server: refactor Hasura.Server.Compression for clarity/correctness

context: This is  foundation work, before we change how the server chooses to compress or not
part of effort: #5518

-----

Prior to this change it was difficult to understand how the functionality in this module related to the semantics of Accept-Encoding. We also didn't correctly handle directives with qvalues.

After this change certain technical infelicities are called out without modifying the behavior of the server; for instance we continue to fall back to identity (no compression) in the case where technically we're supposed to return 406, and we also  continue to treat `*` conservatively as meaning “use no compression”.

The only external change here is `gzip;q=x.y` now results in a zipped response.

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7213
GitOrigin-RevId: 1910ffd70d29f1ab8825c601f1bd998be70ceeeb
This commit is contained in:
Brandon Simmons 2022-12-09 01:06:48 -05:00 committed by hasura-bot
parent c0aa2e8773
commit 6451d9c9ce
4 changed files with 159 additions and 21 deletions

View File

@ -1147,6 +1147,7 @@ test-suite graphql-engine-tests
Hasura.RQL.WebhookTransformsSpec
Hasura.Server.Auth.JWTSpec
Hasura.Server.AuthSpec
Hasura.Server.CompressionSpec
Hasura.Server.InitSpec
Hasura.Server.Init.ArgSpec
Hasura.Server.Migrate.VersionSpec

View File

@ -378,11 +378,11 @@ mkSpockAction serverCtx@ServerCtx {..} qErrEncoder qErrModifier apiHandler = do
let (respBytes, respHeaders) = case result of
JSONResp (HttpResponse encJson h) -> (encJToLBS encJson, pure jsonHeader <> h)
RawResp (HttpResponse rawBytes h) -> (rawBytes, h)
(compressedResp, mEncodingHeader, mCompressionType) = compressResponse (Wai.requestHeaders waiReq) respBytes
encodingHeader = onNothing mEncodingHeader []
(compressedResp, encodingType) = compressResponse (Wai.requestHeaders waiReq) respBytes
encodingHeader = maybeToList (contentEncodingHeader <$> encodingType)
reqIdHeader = (requestIdHeader, txtToBs $ unRequestId reqId)
allRespHeaders = pure reqIdHeader <> encodingHeader <> respHeaders <> authHdrs
lift $ logHttpSuccess scLogger scLoggingSettings userInfo reqId waiReq req respBytes compressedResp qTime mCompressionType reqHeaders httpLoggingMetadata
lift $ logHttpSuccess scLogger scLoggingSettings userInfo reqId waiReq req respBytes compressedResp qTime encodingType reqHeaders httpLoggingMetadata
mapM_ setHeader allRespHeaders
Spock.lazyBytes compressedResp

View File

@ -1,51 +1,114 @@
module Hasura.Server.Compression
( compressResponse,
CompressionType (..),
EncodingType,
identityEncoding,
contentEncodingHeader,
compressionTypeToTxt,
compressFast,
-- * exported for testing
getAcceptedEncodings,
)
where
import Codec.Compression.GZip qualified as GZ
import Data.ByteString.Lazy qualified as BL
import Data.Set qualified as Set
import Data.Text qualified as T
import Hasura.Prelude
import Hasura.Server.Utils (gzipHeader)
import Network.HTTP.Types.Header qualified as NH
-- | Compressed encodings which hasura supports
data CompressionType
= CTGZip
deriving (Show, Eq)
deriving (Show, Eq, Ord)
-- | Accept-Encoding directives (from client) which hasura supports. @Nothing@
-- indicates identity (no compression)
type EncodingType = Maybe CompressionType
identityEncoding :: EncodingType
identityEncoding = Nothing
compressionTypeToTxt :: CompressionType -> Text
compressionTypeToTxt CTGZip = "gzip"
-- | Maybe compress the response body
-- | A map from Accept-Encoding directives to corresponding Content-Encoding
-- headers (from server). NOTE: @identity@ is not a valid directive for this
-- header.
contentEncodingHeader :: CompressionType -> NH.Header
contentEncodingHeader CTGZip = gzipHeader
-- | Maybe compress the response body, based on the client's Accept-Encoding
-- and our own judgement.
compressResponse ::
NH.RequestHeaders ->
BL.ByteString ->
(BL.ByteString, Maybe NH.Header, Maybe CompressionType)
compressResponse reqHeaders unCompressedResp =
let compressionTypeM = getAcceptedCompression reqHeaders
appendCompressionType (res, headerM) = (res, headerM, compressionTypeM)
gzipCompressionParams =
-- See Note [Compression ratios]
GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
in appendCompressionType $ case compressionTypeM of
Just CTGZip -> (GZ.compressWith gzipCompressionParams unCompressedResp, Just gzipHeader)
Nothing -> (unCompressedResp, Nothing)
-- | The response body (possibly compressed), and the encoding chosen
(BL.ByteString, EncodingType)
compressResponse reqHeaders unCompressedResp
-- we have option to gzip:
| acceptedEncodings == Set.fromList [identityEncoding, Just CTGZip]
||
-- we must gzip:
acceptedEncodings == Set.fromList [Just CTGZip] =
(compressFast CTGZip unCompressedResp, Just CTGZip)
-- we must only return an uncompressed response:
| acceptedEncodings == Set.fromList [identityEncoding] =
(unCompressedResp, identityEncoding)
-- this is technically a client error, but ignore for now (maintaining
-- current behavior); assume identity:
| otherwise =
(unCompressedResp, identityEncoding)
where
acceptedEncodings = getAcceptedEncodings reqHeaders
-- | Which, if any, compressed encodings can the client accept?
-- | Compress using
compressFast :: CompressionType -> BL.ByteString -> BL.ByteString
compressFast = \case
CTGZip -> GZ.compressWith gzipCompressionParams
where
gzipCompressionParams =
-- See Note [Compression ratios]
GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
-- | Which encodings can the client accept? The empty set returned here is an
-- error condition and the server tecnically ought to return a 406.
--
-- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
getAcceptedCompression :: NH.RequestHeaders -> Maybe CompressionType
getAcceptedCompression reqHeaders
| "gzip" `elem` acceptEncodingVals = Just CTGZip
| otherwise = Nothing
getAcceptedEncodings :: NH.RequestHeaders -> Set.Set EncodingType
getAcceptedEncodings reqHeaders = Set.fromList acceptedEncodingTypes
where
acceptEncodingVals =
rawHeaderVals =
concatMap (splitHeaderVal . snd) $
filter (\h -> fst h == NH.hAcceptEncoding) reqHeaders
splitHeaderVal bs = map T.strip $ T.splitOn "," $ bsToTxt bs
-- we'll ignore qvalues, except (crucially) to determine if 'identity' is rejected:
identityRejected =
-- ...if we're explicitly rejecting identity, or...
"identity;q=0" `elem` rawHeaderVals
||
-- ...rejecting anything not listed and identity is not listed
( "*;q=0" `elem` rawHeaderVals
&& (not $ any ("identity" `T.isPrefixOf`) rawHeaderVals)
)
gzipAccepted =
any ("gzip" `T.isPrefixOf`) rawHeaderVals
&& ("gzip;q=0" `notElem` rawHeaderVals)
-- AFAICT missing header, or *, implies “send whatever you want”
-- https://www.rfc-editor.org/rfc/rfc7231#section-5.3.4
anyEncodingTechnicallyAcceptable =
null rawHeaderVals || rawHeaderVals == ["*"]
acceptedEncodingTypes
-- \| anyEncodingTechnicallyAcceptable = [Just CTGZip, identityEncoding]
-- NOTE!: For now to be conservative and maintain historical behavior we
-- will treat this case as “only identity is acceptable”:
| anyEncodingTechnicallyAcceptable = [identityEncoding]
| otherwise =
(guard gzipAccepted $> Just CTGZip)
<> (guard (not identityRejected) $> identityEncoding)
{-
Note [Compression ratios]
@ -73,4 +136,29 @@ context.
I didn't test higher compression levels much, but `gzip -4` for the most part
resulted in less than 10% smaller output on random json, and ~30% on our highly
compressible benchmark output.
UPDATE (12/5):
~~~~~~~~~~~~~
Some recent data on compression ratios for graphql responsed (here as:
compressed_size / uncompressed_size) taken from cloud:
Aggregate across all responses where uncompressed > 700 bytes:
max: 0.891 (worst compression)
p99: 0.658
p95: 0.565
p75: 0.467
p50: 0.346
min: 0.005 (best compression)
Aggregate across responses where uncompressed > 17K bytes (90th percentile):
max: 0.773
p99: 0.414
p95: 0.304
p75: 0.202
p50: 0.172
min: 0.005
-}

View File

@ -0,0 +1,49 @@
module Hasura.Server.CompressionSpec (spec) where
import Data.Set qualified as Set
import Hasura.Prelude
import Hasura.Server.Compression
import Test.Hspec
spec :: Spec
spec = describe "serialized data compression" $ parallel do
describe "getAcceptedEncodings" do
it "detects gzip and not" do
getAcceptedEncodings [("x", "x"), ("accept-encoding", "gzip")]
`shouldBe` Set.fromList [Just CTGZip, identityEncoding]
getAcceptedEncodings [("accept-encoding", "brotli, gzip;q=0.9")]
`shouldBe` Set.fromList [Just CTGZip, identityEncoding]
getAcceptedEncodings [("accept-encoding", "brotli")]
`shouldBe` Set.fromList [identityEncoding]
getAcceptedEncodings [("accept-encoding", "identity;q=0.42,brotli, gzip;q=0.9")]
`shouldBe` Set.fromList [Just CTGZip, identityEncoding]
getAcceptedEncodings [("accept-encoding", "identity;q=0.42,brotli, gzip;q=0")]
`shouldBe` Set.fromList [identityEncoding]
it "handles explicit rejection of identity" do
getAcceptedEncodings [("accept-encoding", "identity;q=0,brotli, gzip;q=0.9")]
`shouldBe` Set.fromList [Just CTGZip]
-- strictly per spec this would result in a 406, but we'll likely
-- just decide to return uncompressed (identity) higher up
getAcceptedEncodings [("accept-encoding", "identity;q=0,brotli")]
`shouldBe` Set.fromList []
getAcceptedEncodings [("accept-encoding", "*;q=0,brotli")]
`shouldBe` Set.fromList []
getAcceptedEncodings [("accept-encoding", "gzip, *;q=0")]
`shouldBe` Set.fromList [Just CTGZip]
-- behaviors that might change if we decide it's worth it:
it "arbitrary/historical behavior" do
-- see Compression.hs for discussion
getAcceptedEncodings [("accept-encoding", "*")]
`shouldBe` Set.fromList [identityEncoding]
getAcceptedEncodings []
`shouldBe` Set.fromList [identityEncoding]
getAcceptedEncodings [("accept-encoding", "")]
`shouldBe` Set.fromList [identityEncoding]