2019-03-18 19:22:21 +03:00
|
|
|
-- A module for representing encoded json
|
|
|
|
-- and efficient operations to construct them
|
|
|
|
|
|
|
|
module Hasura.EncJSON
|
|
|
|
( EncJSON,
|
|
|
|
encJFromBuilder,
|
2019-03-25 15:29:52 +03:00
|
|
|
encJToLBS,
|
2019-04-18 15:15:07 +03:00
|
|
|
encJToBS,
|
2019-03-18 19:22:21 +03:00
|
|
|
encJFromJValue,
|
|
|
|
encJFromChar,
|
|
|
|
encJFromText,
|
2022-08-18 01:13:32 +03:00
|
|
|
encJFromNonEmptyText,
|
|
|
|
encJFromBool,
|
2019-03-18 19:22:21 +03:00
|
|
|
encJFromBS,
|
|
|
|
encJFromLBS,
|
|
|
|
encJFromList,
|
|
|
|
encJFromAssocList,
|
2020-10-07 13:23:17 +03:00
|
|
|
encJFromInsOrdHashMap,
|
2021-08-06 16:39:00 +03:00
|
|
|
encJFromOrderedValue,
|
2022-11-02 14:39:49 +03:00
|
|
|
encJFromBsWithoutSoh,
|
|
|
|
encJFromLbsWithoutSoh,
|
2019-03-18 19:22:21 +03:00
|
|
|
)
|
|
|
|
where
|
2021-09-24 01:56:37 +03:00
|
|
|
|
2021-08-06 16:39:00 +03:00
|
|
|
import Data.Aeson qualified as J
|
2022-04-05 22:10:37 +03:00
|
|
|
import Data.Aeson.Encoding qualified as J
|
2021-08-06 16:39:00 +03:00
|
|
|
import Data.Aeson.Ordered qualified as JO
|
|
|
|
import Data.ByteString qualified as B
|
|
|
|
import Data.ByteString.Builder qualified as BB
|
2022-12-07 08:59:52 +03:00
|
|
|
import Data.ByteString.Builder.Extra qualified as BB
|
|
|
|
import Data.ByteString.Builder.Internal qualified as BB
|
2021-08-06 16:39:00 +03:00
|
|
|
import Data.ByteString.Lazy qualified as BL
|
|
|
|
import Data.HashMap.Strict.InsOrd qualified as OMap
|
|
|
|
import Data.Text.Encoding qualified as TE
|
2022-08-18 01:13:32 +03:00
|
|
|
import Data.Text.NonEmpty (NonEmptyText)
|
|
|
|
import Data.Text.NonEmpty qualified as NET
|
2021-08-06 16:39:00 +03:00
|
|
|
import Data.Vector qualified as V
|
2022-11-02 14:39:49 +03:00
|
|
|
import Data.Word (Word8)
|
Import `pg-client-hs` as `PG`
Result of executing the following commands:
```shell
# replace "as Q" imports with "as PG" (in retrospect this didn't need a regex)
git grep -lE 'as Q($|[^a-zA-Z])' -- '*.hs' | xargs sed -i -E 's/as Q($|[^a-zA-Z])/as PG\1/'
# replace " Q." with " PG."
git grep -lE ' Q\.' -- '*.hs' | xargs sed -i 's/ Q\./ PG./g'
# replace "(Q." with "(PG."
git grep -lE '\(Q\.' -- '*.hs' | xargs sed -i 's/(Q\./(PG./g'
# ditto, but for [, |, { and !
git grep -lE '\[Q\.' -- '*.hs' | xargs sed -i 's/\[Q\./\[PG./g'
git grep -l '|Q\.' -- '*.hs' | xargs sed -i 's/|Q\./|PG./g'
git grep -l '{Q\.' -- '*.hs' | xargs sed -i 's/{Q\./{PG./g'
git grep -l '!Q\.' -- '*.hs' | xargs sed -i 's/!Q\./!PG./g'
```
(Doing the `grep -l` before the `sed`, instead of `sed` on the entire codebase, reduces the number of `mtime` updates, and so reduces how many times a file gets recompiled while checking intermediate results.)
Finally, I manually removed a broken and unused `Arbitrary` instance in `Hasura.RQL.Network`. (It used an `import Test.QuickCheck.Arbitrary as Q` statement, which was erroneously caught by the first find-replace command.)
After this PR, `Q` is no longer used as an import qualifier. That was not the goal of this PR, but perhaps it's a useful fact for future efforts.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5933
GitOrigin-RevId: 8c84c59d57789111d40f5d3322c5a885dcfbf40e
2022-09-20 22:54:43 +03:00
|
|
|
import Database.PG.Query qualified as PG
|
2019-03-18 19:22:21 +03:00
|
|
|
import Hasura.Prelude
|
|
|
|
|
|
|
|
newtype EncJSON = EncJSON {unEncJSON :: BB.Builder}
|
2022-04-05 22:10:37 +03:00
|
|
|
|
2022-11-02 14:39:49 +03:00
|
|
|
-- | JSONB bytestrings start with a @SOH@ header @\x01@ and then
|
2022-09-05 18:03:18 +03:00
|
|
|
-- follow with a valid JSON string, therefore we should check for this
|
|
|
|
-- and remove if necessary before decoding as normal
|
Import `pg-client-hs` as `PG`
Result of executing the following commands:
```shell
# replace "as Q" imports with "as PG" (in retrospect this didn't need a regex)
git grep -lE 'as Q($|[^a-zA-Z])' -- '*.hs' | xargs sed -i -E 's/as Q($|[^a-zA-Z])/as PG\1/'
# replace " Q." with " PG."
git grep -lE ' Q\.' -- '*.hs' | xargs sed -i 's/ Q\./ PG./g'
# replace "(Q." with "(PG."
git grep -lE '\(Q\.' -- '*.hs' | xargs sed -i 's/(Q\./(PG./g'
# ditto, but for [, |, { and !
git grep -lE '\[Q\.' -- '*.hs' | xargs sed -i 's/\[Q\./\[PG./g'
git grep -l '|Q\.' -- '*.hs' | xargs sed -i 's/|Q\./|PG./g'
git grep -l '{Q\.' -- '*.hs' | xargs sed -i 's/{Q\./{PG./g'
git grep -l '!Q\.' -- '*.hs' | xargs sed -i 's/!Q\./!PG./g'
```
(Doing the `grep -l` before the `sed`, instead of `sed` on the entire codebase, reduces the number of `mtime` updates, and so reduces how many times a file gets recompiled while checking intermediate results.)
Finally, I manually removed a broken and unused `Arbitrary` instance in `Hasura.RQL.Network`. (It used an `import Test.QuickCheck.Arbitrary as Q` statement, which was erroneously caught by the first find-replace command.)
After this PR, `Q` is no longer used as an import qualifier. That was not the goal of this PR, but perhaps it's a useful fact for future efforts.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5933
GitOrigin-RevId: 8c84c59d57789111d40f5d3322c5a885dcfbf40e
2022-09-20 22:54:43 +03:00
|
|
|
instance PG.FromCol EncJSON where
|
2022-11-02 14:39:49 +03:00
|
|
|
fromCol = \case
|
|
|
|
Just bs -> Right $ encJFromBsWithoutSoh bs
|
|
|
|
-- null values return a JSON null value
|
|
|
|
Nothing -> Right $ encJFromJValue J.Null
|
|
|
|
|
|
|
|
-- | JSONB bytestrings start with a @SOH@ header @\x01@ and then
|
|
|
|
-- follow with a valid JSON string, therefore we should check for this
|
|
|
|
-- and remove if necessary before decoding as normal
|
|
|
|
encJFromBsWithoutSoh :: B.ByteString -> EncJSON
|
|
|
|
encJFromBsWithoutSoh = encJFromBS . removeSOH B.uncons
|
|
|
|
|
|
|
|
-- | JSONB bytestrings start with a @SOH@ header @\x01@ and then
|
|
|
|
-- follow with a valid JSON string, therefore we should check for this
|
|
|
|
-- and remove if necessary before decoding as normal
|
|
|
|
encJFromLbsWithoutSoh :: BL.ByteString -> EncJSON
|
|
|
|
encJFromLbsWithoutSoh = encJFromLBS . removeSOH BL.uncons
|
|
|
|
|
|
|
|
-- | JSONB bytestrings start with a @SOH@ header @\x01@ and then
|
|
|
|
-- follow with a valid JSON string, therefore we should check for this
|
|
|
|
-- and remove if necessary before decoding as normal
|
|
|
|
removeSOH :: (bs -> Maybe (Word8, bs)) -> bs -> bs
|
|
|
|
removeSOH uncons bs =
|
|
|
|
case uncons bs of
|
|
|
|
Just (bsHead, bsTail) ->
|
|
|
|
if bsHead == 1
|
|
|
|
then bsTail
|
|
|
|
else bs
|
|
|
|
Nothing -> bs
|
2022-09-05 18:03:18 +03:00
|
|
|
|
2023-01-20 17:51:11 +03:00
|
|
|
-- NB: this is somewhat wasteful, because the design of the `FromJSON` type
|
|
|
|
-- class forces that the incoming `ByteString` value is first parsed to an
|
|
|
|
-- `aeson` `Value`. But we then immediately re-serialize it here into an
|
|
|
|
-- `EncJSON`.
|
|
|
|
instance J.FromJSON EncJSON where
|
|
|
|
parseJSON = pure . encJFromJValue
|
|
|
|
|
2022-09-05 18:03:18 +03:00
|
|
|
-- No other instances for `EncJSON`. In particular, because:
|
2022-08-11 17:17:53 +03:00
|
|
|
--
|
|
|
|
-- - Having a `Semigroup` or `Monoid` instance allows constructing semantically
|
|
|
|
-- illegal values of type `EncJSON`. To drive this point home: the derived
|
|
|
|
-- `Semigroup` and `Monoid` instances always produce illegal JSON. It is
|
|
|
|
-- merely through an abuse of these APIs that legal JSON can be created.
|
|
|
|
--
|
|
|
|
-- - `IsString` would be a footgun because it's not clear what its expected
|
|
|
|
-- behavior is: does it construct serialized JSON from a serialized `String`,
|
|
|
|
-- or does it serialize a given `String` into a JSON-encoded string value?
|
|
|
|
--
|
|
|
|
-- - `Eq` would also be a footgun: does it compare two serialized values, or
|
|
|
|
-- does it compare values semantically?
|
|
|
|
--
|
|
|
|
-- - `Show`: unused.
|
2019-04-17 12:48:41 +03:00
|
|
|
|
2019-03-18 19:22:21 +03:00
|
|
|
encJToLBS :: EncJSON -> BL.ByteString
|
|
|
|
{-# INLINE encJToLBS #-}
|
2022-12-07 08:59:52 +03:00
|
|
|
encJToLBS = BB.toLazyByteStringWith outputAllocationStrategy mempty . unEncJSON
|
|
|
|
where
|
|
|
|
-- this is a modification of 'untrimmedStrategy' tuned for typical request sizes.
|
|
|
|
-- There's no point to trimming; that's just going to create more garbage
|
|
|
|
-- that will be collected immediately.
|
|
|
|
outputAllocationStrategy =
|
|
|
|
BB.customStrategy nextBuffer bufSize0 (\_ _ -> False)
|
|
|
|
where
|
|
|
|
-- NOTE: on cloud we see uncompressed response body sizes like:
|
|
|
|
-- P50: 150 bytes
|
|
|
|
-- P75: 1200
|
|
|
|
-- P90: 17000
|
|
|
|
-- P99: 95000
|
|
|
|
bufSize0 = 200 -- bytes
|
|
|
|
bufGrowthFactor = 5
|
|
|
|
|
|
|
|
{-# INLINE nextBuffer #-}
|
|
|
|
nextBuffer Nothing = BB.newBuffer bufSize0
|
|
|
|
-- FYI: minSize == bufSize0, except e.g. where `ensureFree` is used (and maybe other situations)
|
|
|
|
nextBuffer (Just (prevBuf, minSize)) =
|
|
|
|
-- nextBufSize grows exponentially, up to defaultChunkSize; but always at least minSize
|
|
|
|
let nextBufSize = max minSize (min BB.defaultChunkSize (BB.bufferSize prevBuf * bufGrowthFactor))
|
|
|
|
in BB.newBuffer nextBufSize
|
2019-03-18 19:22:21 +03:00
|
|
|
|
2019-04-18 15:15:07 +03:00
|
|
|
encJToBS :: EncJSON -> B.ByteString
|
|
|
|
encJToBS = BL.toStrict . encJToLBS
|
|
|
|
{-# INLINE encJToBS #-}
|
|
|
|
|
2019-03-18 19:22:21 +03:00
|
|
|
encJFromBuilder :: BB.Builder -> EncJSON
|
|
|
|
encJFromBuilder = EncJSON
|
|
|
|
{-# INLINE encJFromBuilder #-}
|
|
|
|
|
|
|
|
encJFromBS :: B.ByteString -> EncJSON
|
|
|
|
encJFromBS = EncJSON . BB.byteString
|
|
|
|
{-# INLINE encJFromBS #-}
|
|
|
|
|
|
|
|
encJFromLBS :: BL.ByteString -> EncJSON
|
|
|
|
encJFromLBS = EncJSON . BB.lazyByteString
|
|
|
|
{-# INLINE encJFromLBS #-}
|
|
|
|
|
|
|
|
encJFromJValue :: J.ToJSON a => a -> EncJSON
|
|
|
|
encJFromJValue = encJFromBuilder . J.fromEncoding . J.toEncoding
|
|
|
|
{-# INLINE encJFromJValue #-}
|
|
|
|
|
|
|
|
encJFromChar :: Char -> EncJSON
|
|
|
|
encJFromChar = EncJSON . BB.charUtf8
|
|
|
|
{-# INLINE encJFromChar #-}
|
|
|
|
|
|
|
|
encJFromText :: Text -> EncJSON
|
2022-04-05 19:56:14 +03:00
|
|
|
encJFromText = encJFromBuilder . TE.encodeUtf8Builder
|
2019-03-18 19:22:21 +03:00
|
|
|
{-# INLINE encJFromText #-}
|
|
|
|
|
2022-08-18 01:13:32 +03:00
|
|
|
encJFromNonEmptyText :: NonEmptyText -> EncJSON
|
|
|
|
encJFromNonEmptyText = encJFromBuilder . TE.encodeUtf8Builder . NET.unNonEmptyText
|
|
|
|
{-# INLINE encJFromNonEmptyText #-}
|
|
|
|
|
|
|
|
encJFromBool :: Bool -> EncJSON
|
|
|
|
encJFromBool = \case
|
|
|
|
False -> encJFromText "false"
|
|
|
|
True -> encJFromText "true"
|
|
|
|
{-# INLINE encJFromBool #-}
|
|
|
|
|
2019-03-18 19:22:21 +03:00
|
|
|
encJFromList :: [EncJSON] -> EncJSON
|
2022-08-11 17:17:53 +03:00
|
|
|
encJFromList =
|
|
|
|
encJFromBuilder . \case
|
|
|
|
[] -> "[]"
|
|
|
|
x : xs -> "[" <> unEncJSON x <> foldr go "]" xs
|
|
|
|
where
|
|
|
|
go v b = "," <> unEncJSON v <> b
|
2019-03-18 19:22:21 +03:00
|
|
|
|
|
|
|
-- from association list
|
|
|
|
encJFromAssocList :: [(Text, EncJSON)] -> EncJSON
|
2022-08-11 17:17:53 +03:00
|
|
|
encJFromAssocList =
|
|
|
|
encJFromBuilder . \case
|
|
|
|
[] -> "{}"
|
|
|
|
x : xs -> "{" <> builder' x <> foldr go "}" xs
|
|
|
|
where
|
|
|
|
go v b = "," <> builder' v <> b
|
|
|
|
-- builds "key":value from (key,value)
|
|
|
|
builder' (t, v) = J.fromEncoding (J.text t) <> ":" <> unEncJSON v
|
2020-10-07 13:23:17 +03:00
|
|
|
|
|
|
|
encJFromInsOrdHashMap :: InsOrdHashMap Text EncJSON -> EncJSON
|
|
|
|
encJFromInsOrdHashMap = encJFromAssocList . OMap.toList
|
2021-08-06 16:39:00 +03:00
|
|
|
|
|
|
|
-- | Encode a 'JO.Value' as 'EncJSON'.
|
|
|
|
encJFromOrderedValue :: JO.Value -> EncJSON
|
|
|
|
encJFromOrderedValue = \case
|
|
|
|
JO.Object obj ->
|
|
|
|
encJFromAssocList $ (map . second) encJFromOrderedValue $ JO.toList obj
|
|
|
|
JO.Array vec ->
|
|
|
|
encJFromList $ map encJFromOrderedValue $ V.toList vec
|
|
|
|
JO.String s -> encJFromJValue s
|
|
|
|
JO.Number sci -> encJFromJValue sci
|
|
|
|
JO.Bool b -> encJFromJValue b
|
|
|
|
JO.Null -> encJFromJValue J.Null
|