mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
server: refactor Hasura.Server.Compression for clarity/correctness
context: This is foundation work, before we change how the server chooses to compress or not part of effort: #5518 ----- Prior to this change it was difficult to understand how the functionality in this module related to the semantics of Accept-Encoding. We also didn't correctly handle directives with qvalues. After this change certain technical infelicities are called out without modifying the behavior of the server; for instance we continue to fall back to identity (no compression) in the case where technically we're supposed to return 406, and we also continue to treat `*` conservatively as meaning “use no compression”. The only external change here is `gzip;q=x.y` now results in a zipped response. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7213 GitOrigin-RevId: 1910ffd70d29f1ab8825c601f1bd998be70ceeeb
This commit is contained in:
parent
c0aa2e8773
commit
6451d9c9ce
@ -1147,6 +1147,7 @@ test-suite graphql-engine-tests
|
||||
Hasura.RQL.WebhookTransformsSpec
|
||||
Hasura.Server.Auth.JWTSpec
|
||||
Hasura.Server.AuthSpec
|
||||
Hasura.Server.CompressionSpec
|
||||
Hasura.Server.InitSpec
|
||||
Hasura.Server.Init.ArgSpec
|
||||
Hasura.Server.Migrate.VersionSpec
|
||||
|
@ -378,11 +378,11 @@ mkSpockAction serverCtx@ServerCtx {..} qErrEncoder qErrModifier apiHandler = do
|
||||
let (respBytes, respHeaders) = case result of
|
||||
JSONResp (HttpResponse encJson h) -> (encJToLBS encJson, pure jsonHeader <> h)
|
||||
RawResp (HttpResponse rawBytes h) -> (rawBytes, h)
|
||||
(compressedResp, mEncodingHeader, mCompressionType) = compressResponse (Wai.requestHeaders waiReq) respBytes
|
||||
encodingHeader = onNothing mEncodingHeader []
|
||||
(compressedResp, encodingType) = compressResponse (Wai.requestHeaders waiReq) respBytes
|
||||
encodingHeader = maybeToList (contentEncodingHeader <$> encodingType)
|
||||
reqIdHeader = (requestIdHeader, txtToBs $ unRequestId reqId)
|
||||
allRespHeaders = pure reqIdHeader <> encodingHeader <> respHeaders <> authHdrs
|
||||
lift $ logHttpSuccess scLogger scLoggingSettings userInfo reqId waiReq req respBytes compressedResp qTime mCompressionType reqHeaders httpLoggingMetadata
|
||||
lift $ logHttpSuccess scLogger scLoggingSettings userInfo reqId waiReq req respBytes compressedResp qTime encodingType reqHeaders httpLoggingMetadata
|
||||
mapM_ setHeader allRespHeaders
|
||||
Spock.lazyBytes compressedResp
|
||||
|
||||
|
@ -1,51 +1,114 @@
|
||||
module Hasura.Server.Compression
|
||||
( compressResponse,
|
||||
CompressionType (..),
|
||||
EncodingType,
|
||||
identityEncoding,
|
||||
contentEncodingHeader,
|
||||
compressionTypeToTxt,
|
||||
compressFast,
|
||||
|
||||
-- * exported for testing
|
||||
getAcceptedEncodings,
|
||||
)
|
||||
where
|
||||
|
||||
import Codec.Compression.GZip qualified as GZ
|
||||
import Data.ByteString.Lazy qualified as BL
|
||||
import Data.Set qualified as Set
|
||||
import Data.Text qualified as T
|
||||
import Hasura.Prelude
|
||||
import Hasura.Server.Utils (gzipHeader)
|
||||
import Network.HTTP.Types.Header qualified as NH
|
||||
|
||||
-- | Compressed encodings which hasura supports
|
||||
data CompressionType
|
||||
= CTGZip
|
||||
deriving (Show, Eq)
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
-- | Accept-Encoding directives (from client) which hasura supports. @Nothing@
|
||||
-- indicates identity (no compression)
|
||||
type EncodingType = Maybe CompressionType
|
||||
|
||||
identityEncoding :: EncodingType
|
||||
identityEncoding = Nothing
|
||||
|
||||
compressionTypeToTxt :: CompressionType -> Text
|
||||
compressionTypeToTxt CTGZip = "gzip"
|
||||
|
||||
-- | Maybe compress the response body
|
||||
-- | A map from Accept-Encoding directives to corresponding Content-Encoding
|
||||
-- headers (from server). NOTE: @identity@ is not a valid directive for this
|
||||
-- header.
|
||||
contentEncodingHeader :: CompressionType -> NH.Header
|
||||
contentEncodingHeader CTGZip = gzipHeader
|
||||
|
||||
-- | Maybe compress the response body, based on the client's Accept-Encoding
|
||||
-- and our own judgement.
|
||||
compressResponse ::
|
||||
NH.RequestHeaders ->
|
||||
BL.ByteString ->
|
||||
(BL.ByteString, Maybe NH.Header, Maybe CompressionType)
|
||||
compressResponse reqHeaders unCompressedResp =
|
||||
let compressionTypeM = getAcceptedCompression reqHeaders
|
||||
appendCompressionType (res, headerM) = (res, headerM, compressionTypeM)
|
||||
gzipCompressionParams =
|
||||
-- See Note [Compression ratios]
|
||||
GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
|
||||
in appendCompressionType $ case compressionTypeM of
|
||||
Just CTGZip -> (GZ.compressWith gzipCompressionParams unCompressedResp, Just gzipHeader)
|
||||
Nothing -> (unCompressedResp, Nothing)
|
||||
-- | The response body (possibly compressed), and the encoding chosen
|
||||
(BL.ByteString, EncodingType)
|
||||
compressResponse reqHeaders unCompressedResp
|
||||
-- we have option to gzip:
|
||||
| acceptedEncodings == Set.fromList [identityEncoding, Just CTGZip]
|
||||
||
|
||||
-- we must gzip:
|
||||
acceptedEncodings == Set.fromList [Just CTGZip] =
|
||||
(compressFast CTGZip unCompressedResp, Just CTGZip)
|
||||
-- we must only return an uncompressed response:
|
||||
| acceptedEncodings == Set.fromList [identityEncoding] =
|
||||
(unCompressedResp, identityEncoding)
|
||||
-- this is technically a client error, but ignore for now (maintaining
|
||||
-- current behavior); assume identity:
|
||||
| otherwise =
|
||||
(unCompressedResp, identityEncoding)
|
||||
where
|
||||
acceptedEncodings = getAcceptedEncodings reqHeaders
|
||||
|
||||
-- | Which, if any, compressed encodings can the client accept?
|
||||
-- | Compress using
|
||||
compressFast :: CompressionType -> BL.ByteString -> BL.ByteString
|
||||
compressFast = \case
|
||||
CTGZip -> GZ.compressWith gzipCompressionParams
|
||||
where
|
||||
gzipCompressionParams =
|
||||
-- See Note [Compression ratios]
|
||||
GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
|
||||
|
||||
-- | Which encodings can the client accept? The empty set returned here is an
|
||||
-- error condition and the server tecnically ought to return a 406.
|
||||
--
|
||||
-- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
||||
getAcceptedCompression :: NH.RequestHeaders -> Maybe CompressionType
|
||||
getAcceptedCompression reqHeaders
|
||||
| "gzip" `elem` acceptEncodingVals = Just CTGZip
|
||||
| otherwise = Nothing
|
||||
getAcceptedEncodings :: NH.RequestHeaders -> Set.Set EncodingType
|
||||
getAcceptedEncodings reqHeaders = Set.fromList acceptedEncodingTypes
|
||||
where
|
||||
acceptEncodingVals =
|
||||
rawHeaderVals =
|
||||
concatMap (splitHeaderVal . snd) $
|
||||
filter (\h -> fst h == NH.hAcceptEncoding) reqHeaders
|
||||
splitHeaderVal bs = map T.strip $ T.splitOn "," $ bsToTxt bs
|
||||
-- we'll ignore qvalues, except (crucially) to determine if 'identity' is rejected:
|
||||
identityRejected =
|
||||
-- ...if we're explicitly rejecting identity, or...
|
||||
"identity;q=0" `elem` rawHeaderVals
|
||||
||
|
||||
-- ...rejecting anything not listed and identity is not listed
|
||||
( "*;q=0" `elem` rawHeaderVals
|
||||
&& (not $ any ("identity" `T.isPrefixOf`) rawHeaderVals)
|
||||
)
|
||||
gzipAccepted =
|
||||
any ("gzip" `T.isPrefixOf`) rawHeaderVals
|
||||
&& ("gzip;q=0" `notElem` rawHeaderVals)
|
||||
-- AFAICT missing header, or *, implies “send whatever you want”
|
||||
-- https://www.rfc-editor.org/rfc/rfc7231#section-5.3.4
|
||||
anyEncodingTechnicallyAcceptable =
|
||||
null rawHeaderVals || rawHeaderVals == ["*"]
|
||||
acceptedEncodingTypes
|
||||
-- \| anyEncodingTechnicallyAcceptable = [Just CTGZip, identityEncoding]
|
||||
-- NOTE!: For now to be conservative and maintain historical behavior we
|
||||
-- will treat this case as “only identity is acceptable”:
|
||||
| anyEncodingTechnicallyAcceptable = [identityEncoding]
|
||||
| otherwise =
|
||||
(guard gzipAccepted $> Just CTGZip)
|
||||
<> (guard (not identityRejected) $> identityEncoding)
|
||||
|
||||
{-
|
||||
Note [Compression ratios]
|
||||
@ -73,4 +136,29 @@ context.
|
||||
I didn't test higher compression levels much, but `gzip -4` for the most part
|
||||
resulted in less than 10% smaller output on random json, and ~30% on our highly
|
||||
compressible benchmark output.
|
||||
|
||||
UPDATE (12/5):
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Some recent data on compression ratios for graphql responsed (here as:
|
||||
compressed_size / uncompressed_size) taken from cloud:
|
||||
|
||||
Aggregate across all responses where uncompressed > 700 bytes:
|
||||
|
||||
max: 0.891 (worst compression)
|
||||
p99: 0.658
|
||||
p95: 0.565
|
||||
p75: 0.467
|
||||
p50: 0.346
|
||||
min: 0.005 (best compression)
|
||||
|
||||
Aggregate across responses where uncompressed > 17K bytes (90th percentile):
|
||||
|
||||
max: 0.773
|
||||
p99: 0.414
|
||||
p95: 0.304
|
||||
p75: 0.202
|
||||
p50: 0.172
|
||||
min: 0.005
|
||||
|
||||
-}
|
||||
|
49
server/src-test/Hasura/Server/CompressionSpec.hs
Normal file
49
server/src-test/Hasura/Server/CompressionSpec.hs
Normal file
@ -0,0 +1,49 @@
|
||||
module Hasura.Server.CompressionSpec (spec) where
|
||||
|
||||
import Data.Set qualified as Set
|
||||
import Hasura.Prelude
|
||||
import Hasura.Server.Compression
|
||||
import Test.Hspec
|
||||
|
||||
spec :: Spec
|
||||
spec = describe "serialized data compression" $ parallel do
|
||||
describe "getAcceptedEncodings" do
|
||||
it "detects gzip and not" do
|
||||
getAcceptedEncodings [("x", "x"), ("accept-encoding", "gzip")]
|
||||
`shouldBe` Set.fromList [Just CTGZip, identityEncoding]
|
||||
|
||||
getAcceptedEncodings [("accept-encoding", "brotli, gzip;q=0.9")]
|
||||
`shouldBe` Set.fromList [Just CTGZip, identityEncoding]
|
||||
|
||||
getAcceptedEncodings [("accept-encoding", "brotli")]
|
||||
`shouldBe` Set.fromList [identityEncoding]
|
||||
|
||||
getAcceptedEncodings [("accept-encoding", "identity;q=0.42,brotli, gzip;q=0.9")]
|
||||
`shouldBe` Set.fromList [Just CTGZip, identityEncoding]
|
||||
|
||||
getAcceptedEncodings [("accept-encoding", "identity;q=0.42,brotli, gzip;q=0")]
|
||||
`shouldBe` Set.fromList [identityEncoding]
|
||||
|
||||
it "handles explicit rejection of identity" do
|
||||
getAcceptedEncodings [("accept-encoding", "identity;q=0,brotli, gzip;q=0.9")]
|
||||
`shouldBe` Set.fromList [Just CTGZip]
|
||||
|
||||
-- strictly per spec this would result in a 406, but we'll likely
|
||||
-- just decide to return uncompressed (identity) higher up
|
||||
getAcceptedEncodings [("accept-encoding", "identity;q=0,brotli")]
|
||||
`shouldBe` Set.fromList []
|
||||
getAcceptedEncodings [("accept-encoding", "*;q=0,brotli")]
|
||||
`shouldBe` Set.fromList []
|
||||
|
||||
getAcceptedEncodings [("accept-encoding", "gzip, *;q=0")]
|
||||
`shouldBe` Set.fromList [Just CTGZip]
|
||||
|
||||
-- behaviors that might change if we decide it's worth it:
|
||||
it "arbitrary/historical behavior" do
|
||||
-- see Compression.hs for discussion
|
||||
getAcceptedEncodings [("accept-encoding", "*")]
|
||||
`shouldBe` Set.fromList [identityEncoding]
|
||||
getAcceptedEncodings []
|
||||
`shouldBe` Set.fromList [identityEncoding]
|
||||
getAcceptedEncodings [("accept-encoding", "")]
|
||||
`shouldBe` Set.fromList [identityEncoding]
|
Loading…
Reference in New Issue
Block a user