2019-09-19 15:54:40 +03:00
|
|
|
module Hasura.Server.Compression
|
2021-09-24 01:56:37 +03:00
|
|
|
( compressResponse,
|
|
|
|
CompressionType (..),
|
|
|
|
compressionTypeToTxt,
|
2019-09-19 15:54:40 +03:00
|
|
|
)
|
|
|
|
where
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
import Codec.Compression.GZip qualified as GZ
|
|
|
|
import Data.ByteString.Lazy qualified as BL
|
|
|
|
import Data.Text qualified as T
|
|
|
|
import Hasura.Prelude
|
|
|
|
import Hasura.Server.Utils (gzipHeader)
|
|
|
|
import Network.HTTP.Types.Header qualified as NH
|
2019-09-19 15:54:40 +03:00
|
|
|
|
|
|
|
data CompressionType
|
|
|
|
= CTGZip
|
|
|
|
deriving (Show, Eq)
|
|
|
|
|
2020-10-27 16:53:49 +03:00
|
|
|
compressionTypeToTxt :: CompressionType -> Text
|
2021-09-24 01:56:37 +03:00
|
|
|
compressionTypeToTxt CTGZip = "gzip"
|
2019-09-19 15:54:40 +03:00
|
|
|
|
2022-08-25 09:42:07 +03:00
|
|
|
-- | Maybe compress the response body
|
2021-09-24 01:56:37 +03:00
|
|
|
compressResponse ::
|
|
|
|
NH.RequestHeaders ->
|
|
|
|
BL.ByteString ->
|
|
|
|
(BL.ByteString, Maybe NH.Header, Maybe CompressionType)
|
2019-09-19 15:54:40 +03:00
|
|
|
compressResponse reqHeaders unCompressedResp =
|
2022-08-25 09:42:07 +03:00
|
|
|
let compressionTypeM = getAcceptedCompression reqHeaders
|
2019-09-19 15:54:40 +03:00
|
|
|
appendCompressionType (res, headerM) = (res, headerM, compressionTypeM)
|
2019-10-05 10:20:50 +03:00
|
|
|
gzipCompressionParams =
|
2022-08-25 09:42:07 +03:00
|
|
|
-- See Note [Compression ratios]
|
2021-09-24 01:56:37 +03:00
|
|
|
GZ.defaultCompressParams {GZ.compressLevel = GZ.compressionLevel 1}
|
|
|
|
in appendCompressionType $ case compressionTypeM of
|
|
|
|
Just CTGZip -> (GZ.compressWith gzipCompressionParams unCompressedResp, Just gzipHeader)
|
|
|
|
Nothing -> (unCompressedResp, Nothing)
|
2019-09-19 15:54:40 +03:00
|
|
|
|
2022-08-25 09:42:07 +03:00
|
|
|
-- | Which, if any, compressed encodings can the client accept?
|
|
|
|
--
|
|
|
|
-- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
|
|
|
getAcceptedCompression :: NH.RequestHeaders -> Maybe CompressionType
|
|
|
|
getAcceptedCompression reqHeaders
|
2019-09-19 15:54:40 +03:00
|
|
|
| "gzip" `elem` acceptEncodingVals = Just CTGZip
|
2021-09-24 01:56:37 +03:00
|
|
|
| otherwise = Nothing
|
2019-09-19 15:54:40 +03:00
|
|
|
where
|
2021-09-24 01:56:37 +03:00
|
|
|
acceptEncodingVals =
|
|
|
|
concatMap (splitHeaderVal . snd) $
|
|
|
|
filter (\h -> fst h == NH.hAcceptEncoding) reqHeaders
|
2019-09-19 15:54:40 +03:00
|
|
|
splitHeaderVal bs = map T.strip $ T.splitOn "," $ bsToTxt bs
|
2022-08-25 09:42:07 +03:00
|
|
|
|
|
|
|
{-
|
|
|
|
Note [Compression ratios]
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
I did some measurements of compression ratios at `gzip -1` (libc) of some
|
|
|
|
randomly generated json, real json datasets, and output from our benchmarked
|
|
|
|
chinook queries:
|
|
|
|
|
|
|
|
2552/6131 = 0.41
|
|
|
|
4666/8718 = 0.53
|
|
|
|
13921/27131 = 0.51
|
|
|
|
5895/8879 = 0.66 <----- completely random strings
|
|
|
|
8634/28261 = 0.30
|
|
|
|
70422/372466 = 0.18
|
|
|
|
|
|
|
|
200/600 = 0.33 <----| from chinook graphql benchmarks
|
|
|
|
3000/33000 = 0.09 <----|
|
|
|
|
13000/190000 = 0.07 <----'
|
|
|
|
|
|
|
|
Given these numbers I would suggest using a rule-of-thumb expected compression
|
|
|
|
ratio between 2:1 and 10:1, depending on what being conservative means in the
|
|
|
|
context.
|
|
|
|
|
|
|
|
I didn't test higher compression levels much, but `gzip -4` for the most part
|
|
|
|
resulted in less than 10% smaller output on random json, and ~30% on our highly
|
|
|
|
compressible benchmark output.
|
|
|
|
-}
|