graphql-engine/server/src-test/Hasura/Generator.hs
Antoine Leblanc 52d91e3e8d Update GraphQL Parser version to fix text encoding issue (fix #1965)
### A long tale about encoding

GraphQL has an [introspection system](http://spec.graphql.org/June2018/#sec-Introspection), which allows its schema to be introspected. This is what we use to introspect [remote schemas](41383e1f88/server/src-rsr/introspection.json). There is one place in the introspection where we might find GraphQL values: the default value of an argument.

```json
{
  "fields": [
    {
      "name": "echo",
      "args": [
        {
          "name": "msg",
          "defaultValue": "\"Hello\\nWorld!\""
        }
      ]
    }
  ]
}
```

Note that GraphQL's introspection is transport agnostic: the default value isn't returned as a JSON value, but as a _string-encoded GraphQL Value_. In this case, the value is the GraphQL String `"Hello\nWorld!"`. Embedded into a string, it is encoded as: `"\"Hello\\nWorld!\""`.

When we [parse that value](41383e1f88/server/src-lib/Hasura/GraphQL/RemoteServer.hs (L351)), we first extract that JSON string, to get its content, `"Hello\nWorld!"`, then use our [GraphQL Parser library](21c1ddfb41/src/Language/GraphQL/Draft/Parser.hs (L200)) to interpret this: we find the double quote, understand that the content is a String, unescape the backslashes, and end up with the desired string value: `['H', 'e', 'l', 'l', 'o', '\n', 'W', 'o', 'r', 'l', 'd', '!']`. This all works fine.

However, there was a bug in the _printer_ part of our parser library: when printing back a String value, we would not re-escape characters properly. In practice, this meant that the GraphQL String `"Hello\nWorld"` would be encoded in JSON as `"\"Hello\nWorld!\""`. Note how the `\n` is not properly double-escaped. This led to a variety of problems, as described in #1965:
- we would successfully parse a remote schema containing such characters in its default values, but then would print those erroneous JSON values in our introspection, which would _crash the console_
- we would inject those default values in queries sent to remote schemas, and print them wrong doing so, sending invalid values to remote schemas and getting errors in result

It turns out that this bug had been lurking in the code for a long time: I combed through the history of [the parser library](https://github.com/hasura/graphql-parser-hs), and as far as I can tell, this bug has always been there. So why was it never caught? After all, we do have [round trip tests](21c1ddfb41/test/Spec.hs (L52)) that print + parse arbitrary values and check that we get the same value as a result. They do use any arbitrary unicode character in their generated strings. So... that should have covered it, right?

Well... it turns out that [the tests were ignoring errors](7678066c49/test/Spec.hs (L45)), and would always return "SUCCESS" in CI, even if they failed... Furthermore, the sample size was small enough that, most of the time, _they would not hit such characters_. Running the tests locally on a loop, I only got errors ~10% of the time...

This was all fixed in hasura/graphql-parser-hs#44. This was probably one of Hasura's longest standing bugs? ^^'

### Description

This PR bumps the version of graphql-parser-hs in the engine, and switches some of our own arbitrary tests to use unicode characters in text rather than alphanumeric values. It turns out those tests were much better at hitting "bad" values, and that they consistently failed when generating arbitrary unicode characters.

https://github.com/hasura/graphql-engine-mono/pull/2031

GitOrigin-RevId: 54fa48270386a67336e5544351691619e0684559
2021-08-06 11:54:45 +00:00

239 lines
7.7 KiB
Haskell

{-# OPTIONS_GHC -fno-warn-orphans #-}
module Hasura.Generator () where
import Hasura.Prelude
import qualified Data.HashMap.Strict as Map
import qualified Data.Text as T
import qualified Language.GraphQL.Draft.Syntax as G
import Data.Containers.ListUtils (nubOrd)
import Data.Ratio ((%))
import Test.QuickCheck
import Hasura.RQL.Types.RemoteSchema
import Hasura.RQL.Types.SchemaCache (IntrospectionResult (..))
import Hasura.Server.Utils
import Hasura.Session
-- Quickcheck helpers
distinct :: (Arbitrary a, Ord a) => Gen [a]
distinct = nubOrd <$> arbitrary
distinct1 :: (Arbitrary a, Ord a) => Gen [a]
distinct1 = nubOrd <$> listOf1 arbitrary
arbitraryExcluding :: (Arbitrary a, Eq a) => [a] -> Gen a
arbitraryExcluding exclusions = arbitrary `suchThat` (`notElem` exclusions)
distinctExcluding :: (Arbitrary a, Ord a) => [a] -> Gen [a]
distinctExcluding = fmap nubOrd . listOf . arbitraryExcluding
distinctExcluding1 :: (Arbitrary a, Ord a) => [a] -> Gen [a]
distinctExcluding1 = fmap nubOrd . listOf1 . arbitraryExcluding
sublistOf1 :: [a] -> Gen [a]
sublistOf1 xs = sublistOf xs `suchThat` (not . null)
-- Third party instances
instance Arbitrary Text where
arbitrary = T.pack <$> listOf arbitraryUnicodeChar
instance (Arbitrary k, Eq k, Hashable k, Arbitrary v) => Arbitrary (HashMap k v) where
arbitrary = Map.fromList <$> arbitrary
-- GraphQL syntax instances
instance Arbitrary G.Name where
arbitrary = G.unsafeMkName . T.pack <$> listOf1 (elements ['a'..'z'])
instance Arbitrary G.Description where
arbitrary = G.Description <$> arbitrary
instance Arbitrary G.EnumValue where
arbitrary = G.EnumValue <$> arbitrary
instance Arbitrary G.EnumValueDefinition where
arbitrary = G.EnumValueDefinition
<$> arbitrary
<*> arbitrary
<*> pure []
instance Arbitrary G.Nullability where
arbitrary = G.Nullability <$> arbitrary
instance Arbitrary (G.Value Void) where
arbitrary = oneof
[ pure G.VNull
, G.VInt <$> arbitrary
, G.VFloat <$> arbitraryScientific
, G.VString <$> arbitrary
, G.VBoolean <$> arbitrary
, G.VEnum <$> arbitrary
-- reduce the internal size factor at every level, so that this
-- recursion is guaranteed to terminate
, G.VList <$> scale (`div` 2) arbitrary
, G.VObject <$> scale (`div` 2) arbitrary
]
where
arbitraryScientific = do
-- fromRational can create invalid repeating values that loop forever
-- we avoid this by creating known good ratios
num :: Integer <- arbitrary
dem :: Integer <- elements [1..32]
pure $ fromRational $ num % (10 ^ dem)
-- Hasura instances
instance Arbitrary SessionVariable where
arbitrary = do
name <- arbitrary
pure $ mkSessionVariable $ sessionVariablePrefix <> name
instance Arbitrary IntrospectionResult where
arbitrary = do
-- first, generate distinct names for each kind of object
scalarTypeNames <- distinct
objectTypeNames <- distinctExcluding1 scalarTypeNames
interfaceTypeNames <- distinctExcluding $ scalarTypeNames ++ objectTypeNames
unionTypeNames <- distinctExcluding $ scalarTypeNames ++ objectTypeNames ++ interfaceTypeNames
enumTypeNames <- distinctExcluding $ scalarTypeNames ++ objectTypeNames ++ interfaceTypeNames ++ unionTypeNames
let outputTypeNames = scalarTypeNames ++ objectTypeNames ++ interfaceTypeNames ++ unionTypeNames ++ enumTypeNames
inputObjectTypeNames <- distinctExcluding outputTypeNames
let inputTypeNames = scalarTypeNames ++ enumTypeNames ++ inputObjectTypeNames
let inputValues = case inputTypeNames of
[] -> pure []
_ -> listOf $ genRemoteSchemaInputValueDefinition inputTypeNames
-- then, create a matching definition for each name
scalarTypeDefinitions <- for scalarTypeNames $
genScalarTypeDefinition
objectTypeDefinitions <- for objectTypeNames $
genObjectTypeDefinition inputValues outputTypeNames interfaceTypeNames
interfaceTypeDefinitions <- for interfaceTypeNames $
genInterfaceTypeDefinition inputValues outputTypeNames
unionTypeDefinitions <- for unionTypeNames $
genUnionTypeDefinition objectTypeNames
enumTypeDefinitions <- for enumTypeNames $
genEnumTypeDefinition
inputObjectTypeDefinitions <- for inputObjectTypeNames $
genInputObjectTypeDefinition inputValues
-- finally, create an IntrospectionResult from the aggregated definitions
let irDoc = RemoteSchemaIntrospection $ concat
[ G.TypeDefinitionScalar <$> scalarTypeDefinitions
, G.TypeDefinitionObject <$> objectTypeDefinitions
, G.TypeDefinitionInterface <$> interfaceTypeDefinitions
, G.TypeDefinitionUnion <$> unionTypeDefinitions
, G.TypeDefinitionEnum <$> enumTypeDefinitions
, G.TypeDefinitionInputObject <$> inputObjectTypeDefinitions
]
irQueryRoot <- elements objectTypeNames
let maybeObjectTypeName = elements $ Nothing : (Just <$> objectTypeNames)
irMutationRoot <- maybeObjectTypeName
irSubscriptionRoot <- maybeObjectTypeName
pure $ IntrospectionResult {..}
-- Generator helpers
genGType :: [G.Name] -> Gen G.GType
genGType typeNames = frequency
-- bias towards avoiding deeply nested lists
[ (7, G.TypeNamed <$> arbitrary <*> elements typeNames)
, (3, G.TypeList <$> arbitrary <*> genGType typeNames)
]
genInputValueDefinition :: [G.Name] -> Gen G.InputValueDefinition
genInputValueDefinition inputTypeNames = G.InputValueDefinition
<$> arbitrary
<*> arbitrary
<*> genGType inputTypeNames
<*> arbitrary
<*> pure []
genScalarTypeDefinition :: G.Name -> Gen G.ScalarTypeDefinition
genScalarTypeDefinition name = G.ScalarTypeDefinition
<$> arbitrary
<*> pure name
<*> pure []
genEnumTypeDefinition :: G.Name -> Gen G.EnumTypeDefinition
genEnumTypeDefinition name = G.EnumTypeDefinition
<$> arbitrary
<*> pure name
<*> pure []
<*> listOf1 arbitrary
genUnionTypeDefinition :: [G.Name] -> G.Name -> Gen G.UnionTypeDefinition
genUnionTypeDefinition objectTypeNames name = G.UnionTypeDefinition
<$> arbitrary
<*> pure name
<*> pure []
<*> sublistOf1 objectTypeNames
genFieldDefinition
:: Gen [inputType]
-> [G.Name]
-> G.Name
-> Gen (G.FieldDefinition inputType)
genFieldDefinition inputTypes outputTypeNames name = G.FieldDefinition
<$> arbitrary
<*> pure name
<*> inputTypes
<*> genGType outputTypeNames
<*> pure []
genObjectTypeDefinition
:: Gen [inputType]
-> [G.Name]
-> [G.Name]
-> G.Name
-> Gen (G.ObjectTypeDefinition inputType)
genObjectTypeDefinition inputTypes outputTypeNames interfaceTypeNames name = G.ObjectTypeDefinition
<$> arbitrary
<*> pure name
<*> sublistOf interfaceTypeNames
<*> pure []
<*> fields
where
fields = distinct1 >>= traverse (genFieldDefinition inputTypes outputTypeNames)
genInterfaceTypeDefinition
:: Arbitrary possibleType
=> Gen [inputType]
-> [G.Name]
-> G.Name
-> Gen (G.InterfaceTypeDefinition [possibleType] inputType)
genInterfaceTypeDefinition inputTypes outputTypeNames name = G.InterfaceTypeDefinition
<$> arbitrary
<*> pure name
<*> pure []
<*> fields
<*> listOf1 arbitrary
where
fields = distinct1 >>= traverse (genFieldDefinition inputTypes outputTypeNames)
genInputObjectTypeDefinition
:: Gen [inputType]
-> G.Name
-> Gen (G.InputObjectTypeDefinition inputType)
genInputObjectTypeDefinition values name = G.InputObjectTypeDefinition
<$> arbitrary
<*> pure name
<*> pure []
<*> values
genRemoteSchemaInputValueDefinition :: [G.Name] -> Gen RemoteSchemaInputValueDefinition
genRemoteSchemaInputValueDefinition inputTypeNames = RemoteSchemaInputValueDefinition
<$> genInputValueDefinition inputTypeNames
<*> pure Nothing