From bb5766f959ffb9762e8de7e552f0bd2a9b6142f4 Mon Sep 17 00:00:00 2001 From: Kulikov Vladislav Date: Mon, 20 Jan 2025 10:31:06 +0300 Subject: [PATCH 1/3] [#295] fix OverlongHeaders Problem: xrefcheck may fail with OverlongHeaders making it impossible to check a given file. Solution: make it possible to configure max header length for responses that xrefcheck is handling. --- package.yaml | 3 ++- src/Xrefcheck/Command.hs | 14 +++++++++++--- src/Xrefcheck/Config.hs | 19 +++++++++++++++++++ src/Xrefcheck/Config/Default.hs | 7 +++++++ src/Xrefcheck/Verify.hs | 15 +++++++++++++-- tests/configs/github-config.yaml | 7 +++++++ 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/package.yaml b/package.yaml index 8b6692b5..12875664 100644 --- a/package.yaml +++ b/package.yaml @@ -95,7 +95,8 @@ library: - ftp-client - crypton-connection - Glob - - http-client + - http-client >= 0.7.17 + - http-client-tls - http-types - lens - modern-uri diff --git a/src/Xrefcheck/Command.hs b/src/Xrefcheck/Command.hs index 33ccdc03..4e9a6757 100644 --- a/src/Xrefcheck/Command.hs +++ b/src/Xrefcheck/Command.hs @@ -7,7 +7,9 @@ module Xrefcheck.Command ( defaultAction ) where -import Universum +import Universum hiding ((.~)) + +import Control.Lens ((.~)) import Data.Reflection (Given, give) import Data.Yaml (decodeFileEither, prettyPrintParseException) @@ -15,10 +17,12 @@ import Fmt (build, fmt, fmtLn) import System.Console.Pretty (supportsPretty) import System.Directory (doesFileExist) import Text.Interpolation.Nyan +import Network.HTTP.Client (newManager, managerSetMaxHeaderLength) +import Network.HTTP.Client.TLS (tlsManagerSettings) import Xrefcheck.CLI (Options (..), addExclusionOptions, addNetworkingOptions, defaultConfigPaths) import Xrefcheck.Config - (Config, Config' (..), ScannersConfig, ScannersConfig' (..), defConfig, overrideConfig) + (Config, Config' (..), NetworkingConfig' (..), ScannersConfig, ScannersConfig' (..), defConfig, overrideConfig, cNetworkingL, ncHttpManagerL) import Xrefcheck.Core (Flavor (..)) import Xrefcheck.Progress (allowRewrite) import Xrefcheck.Scan @@ -87,8 +91,12 @@ defaultAction Options{..} = do whenJust (nonEmpty $ sortBy (compare `on` seFile) scanErrs) reportScanErrs verifyRes <- allowRewrite showProgressBar $ \rw -> do - let fullConfig = config + let parsedConfig = config { cNetworking = addNetworkingOptions (cNetworking config) oNetworkingOptions } + + mgr <- newManager $ managerSetMaxHeaderLength (ncMaxHeaderLength (cNetworking parsedConfig)) tlsManagerSettings + let fullConfig = parsedConfig & cNetworkingL . ncHttpManagerL .~ Just mgr + verifyRepo rw fullConfig oMode repoInfo case verifyErrors verifyRes of diff --git a/src/Xrefcheck/Config.hs b/src/Xrefcheck/Config.hs index 0c130196..efe64400 100644 --- a/src/Xrefcheck/Config.hs +++ b/src/Xrefcheck/Config.hs @@ -18,6 +18,7 @@ import Data.Aeson (genericParseJSON) import Data.Yaml (FromJSON (..), decodeEither', prettyPrintParseException, withText) import Text.Regex.TDFA.Text () import Time (KnownRatName, Second, Time (..), unitsP) +import Network.HTTP.Client (Manager) import Xrefcheck.Config.Default import Xrefcheck.Core @@ -85,6 +86,19 @@ data NetworkingConfig' f = NetworkingConfig -- chain. , ncExternalRefRedirects :: Field f RedirectConfig -- ^ Rules to override the redirect behavior for external references. + , ncMaxHeaderLength :: Field f Int + -- ^ The maximum allowed total size of HTTP headers (in bytes) that can + -- be returned by the server. + -- + -- If the total size of the headers exceeds this value, the request will + -- fail with an error to prevent the processing of excessively large headers. + , ncHttpManager :: Field f (Maybe Manager) + -- ^ A custom HTTP Manager used for all HTTP requests. + -- + -- Using the same implicit global manager for provides maximal connection + -- sharing. + -- + -- If 'Nothing', a default manager will be used. } deriving stock (Generic) -- | A list of custom redirect rules. @@ -151,6 +165,8 @@ overrideConfig config , ncMaxTimeoutRetries = overrideField ncMaxTimeoutRetries , ncMaxRedirectFollows = overrideField ncMaxRedirectFollows , ncExternalRefRedirects = overrideField ncExternalRefRedirects + , ncMaxHeaderLength = overrideField ncMaxHeaderLength + , ncHttpManager = overrideField ncHttpManager } where overrideField :: (forall f. NetworkingConfig' f -> Field f a) -> a @@ -181,3 +197,6 @@ instance FromJSON (ScannersConfig) where instance FromJSON (ScannersConfig' Maybe) where parseJSON = genericParseJSON aesonConfigOption + +instance FromJSON Manager where + parseJSON _ = fail "Manager field is not configurable" diff --git a/src/Xrefcheck/Config/Default.hs b/src/Xrefcheck/Config/Default.hs index 86eec9e2..c587621f 100644 --- a/src/Xrefcheck/Config/Default.hs +++ b/src/Xrefcheck/Config/Default.hs @@ -105,6 +105,13 @@ networking: externalRefRedirects: #{interpolateIndentF 4 externalRefRedirects} + # The maximum allowed total size of HTTP headers (in bytes) that can + # be returned by the server. + # + # If the total size of the headers exceeds this value, the request will + # fail with an error to prevent the processing of excessively large headers. + maxHeaderLength: 4096 + # Parameters of scanners for various file types. scanners: # On 'anchor not found' error, how much similar anchors should be displayed as diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 52ced5b9..106ad870 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -57,7 +57,7 @@ import Network.HTTP.Client import Network.HTTP.Req (AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed, HttpConfig (httpConfigRedirectCount), HttpException (..), HttpMethod, NoReqBody (..), - defaultHttpConfig, ignoreResponse, req, runReq, useURI) + defaultHttpConfig, ignoreResponse, req, runReq, useURI, httpConfigAltManager) import Network.HTTP.Types.Header (hRetryAfter) import Network.HTTP.Types.Status (Status, statusCode, statusMessage) import Text.Interpolation.Nyan @@ -136,6 +136,7 @@ data VerifyError | RedirectMissingLocation RedirectChain | RedirectChainLimit RedirectChain | RedirectRuleError RedirectChain (Maybe RedirectRuleOn) + | MaxHeaderLengthError Int deriving stock (Show, Eq) data ResponseResult @@ -287,6 +288,11 @@ pprVerifyErr' rInfo = \case Just RROTemporary -> "Temporary redirect" Just (RROCode code) -> show code <> " redirect" + MaxHeaderLengthError len -> + [int|| + The total size of the response headers exceeds the limit of #{len} bytes. + |] <> pprLinkCtx rInfo + attachToRedirectChain :: RedirectChain -> Text -> Builder attachToRedirectChain chain attached = build chain <> build attachedText @@ -718,7 +724,10 @@ checkExternalResource followed config@Config{..} link _ -> makeHttpRequest uri GET 0.7 httpConfig :: HttpConfig - httpConfig = defaultHttpConfig { httpConfigRedirectCount = 0 } + httpConfig = defaultHttpConfig + { httpConfigRedirectCount = 0 + , httpConfigAltManager = ncHttpManager + } makeHttpRequest :: (HttpMethod method, HttpBodyAllowed (AllowsBody method) 'NoBody) @@ -812,6 +821,8 @@ checkExternalResource followed config@Config{..} link | Just (N.C.HostCannotConnect _ _) <- fromException e -> throwError ExternalResourceConnectionFailure + OverlongHeaders -> throwError $ MaxHeaderLengthError ncMaxHeaderLength + other -> throwError $ ExternalResourceSomeError $ show other where retryAfterInfo :: Response a -> Maybe RetryAfter diff --git a/tests/configs/github-config.yaml b/tests/configs/github-config.yaml index adf55523..07c7b4fc 100644 --- a/tests/configs/github-config.yaml +++ b/tests/configs/github-config.yaml @@ -94,6 +94,13 @@ networking: - on: permanent outcome: invalid + # The maximum allowed total size of HTTP headers (in bytes) that can + # be returned by the server. + # + # If the total size of the headers exceeds this value, the request will + # fail with an error to prevent the processing of excessively large headers. + maxHeaderLength: 4096 + # Parameters of scanners for various file types. scanners: # On 'anchor not found' error, how much similar anchors should be displayed as From 04ddbfe445a24f77e39af8a207c0cf71625735e1 Mon Sep 17 00:00:00 2001 From: Kulikov Vladislav Date: Mon, 20 Jan 2025 10:31:54 +0300 Subject: [PATCH 2/3] [#295] cli flag 'headers-limit' added --- src/Xrefcheck/CLI.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Xrefcheck/CLI.hs b/src/Xrefcheck/CLI.hs index 25bce8a7..40909c13 100644 --- a/src/Xrefcheck/CLI.hs +++ b/src/Xrefcheck/CLI.hs @@ -103,12 +103,14 @@ addExclusionOptions ExclusionConfig{..} (ExclusionOptions ignore) = data NetworkingOptions = NetworkingOptions { noMaxRetries :: Maybe Int + , noMaxHeaderLength :: Maybe Int } addNetworkingOptions :: NetworkingConfig -> NetworkingOptions -> NetworkingConfig -addNetworkingOptions NetworkingConfig{..} (NetworkingOptions maxRetries) = +addNetworkingOptions NetworkingConfig{..} (NetworkingOptions maxRetries maxHeaderLength) = NetworkingConfig { ncMaxRetries = fromMaybe ncMaxRetries maxRetries + , ncMaxHeaderLength = fromMaybe ncMaxHeaderLength maxHeaderLength , .. } @@ -228,6 +230,13 @@ networkingOptionsParser = do value Nothing <> help "How many attempts to retry an external link after getting \ \a \"429 Too Many Requests\" response." + + noMaxHeaderLength <- option (Just <$> auto) $ + long "header-limit" <> + metavar "INT" <> + value Nothing <> + help "The maximum allowed total size of HTTP headers (in bytes) \ + \ that can be returned by the server." return NetworkingOptions{..} dumpConfigOptions :: Parser Command From 79403e88e323803b76fb9b139a16446cc2310892 Mon Sep 17 00:00:00 2001 From: Kulikov Vladislav Date: Mon, 20 Jan 2025 10:32:11 +0300 Subject: [PATCH 3/3] [#295] tests added --- package.yaml | 2 + tests/Test/Xrefcheck/MaxHeaderLengthSpec.hs | 74 +++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tests/Test/Xrefcheck/MaxHeaderLengthSpec.hs diff --git a/package.yaml b/package.yaml index 12875664..525db558 100644 --- a/package.yaml +++ b/package.yaml @@ -156,6 +156,7 @@ tests: - warp - scotty - http-types + - http-client - lens - modern-uri - nyan-interpolation @@ -165,6 +166,7 @@ tests: - tasty - tasty-hunit - tasty-quickcheck + - text - time - universum - uri-bytestring diff --git a/tests/Test/Xrefcheck/MaxHeaderLengthSpec.hs b/tests/Test/Xrefcheck/MaxHeaderLengthSpec.hs new file mode 100644 index 00000000..7fd4bacb --- /dev/null +++ b/tests/Test/Xrefcheck/MaxHeaderLengthSpec.hs @@ -0,0 +1,74 @@ +{- SPDX-FileCopyrightText: 2021 Serokell + - + - SPDX-License-Identifier: MPL-2.0 + -} + +module Test.Xrefcheck.MaxHeaderLengthSpec where + +import Universum hiding ((.~)) + +import Control.Lens ((.~)) +import Data.Set qualified as S +import Network.HTTP.Client (newManager, managerSetMaxHeaderLength, defaultManagerSettings) +import Network.HTTP.Types (ok200) +import Network.Wai qualified as Web +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.HUnit (testCase) +import Web.Scotty qualified as Web +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL + +import Test.Xrefcheck.UtilRequests +import Xrefcheck.Config +import Xrefcheck.Progress +import Xrefcheck.Verify + +mockHeader :: Int -> IO Web.Application +mockHeader size = Web.scottyApp $ do + Web.matchAny "/header" $ do + Web.setHeader "X-header" (TL.fromStrict $ T.replicate size "x") + Web.status ok200 + +test_maxHeaderLength :: TestTree +test_maxHeaderLength = testGroup "MaxHeaderLength tests" + [ testCase "Succeeds with small header" $ do + setRef <- newIORef S.empty + mgr <- newManager $ managerSetMaxHeaderLength mhl defaultManagerSettings + checkMultipleLinksWithServer + (5001, mockHeader (mhl `div` 2)) + setRef + [ VerifyLinkTestEntry + { vlteConfigModifier = \c -> c + & cNetworkingL . ncMaxHeaderLengthL .~ mhl + & cNetworkingL . ncHttpManagerL .~ Just mgr + , vlteLink = "http://127.0.0.1:5001/header" + , vlteExpectedProgress = mkProgressWithOneTask True + , vlteExpectationErrors = VerifyResult [] + } + ] + + , testCase "Fails with MaxHeaderLengthError" $ do + setRef <- newIORef S.empty + mgr <- newManager $ managerSetMaxHeaderLength mhl defaultManagerSettings + checkMultipleLinksWithServer + (5002, mockHeader (mhl*2)) + setRef + [ VerifyLinkTestEntry + { vlteConfigModifier = \c -> c + & cNetworkingL . ncMaxHeaderLengthL .~ mhl + & cNetworkingL . ncHttpManagerL .~ Just mgr + , vlteLink = "http://127.0.0.1:5002/header" + , vlteExpectedProgress = mkProgressWithOneTask False + , vlteExpectationErrors = VerifyResult [MaxHeaderLengthError mhl] + } + ] + ] + where + mhl = 4096 + + mkProgressWithOneTask shouldSucceed = report "" $ initProgress 1 + where + report = + if shouldSucceed + then reportSuccess + else reportError