graphql-engine/server/src-lib/Hasura/Backends/Postgres/SQL/RenameIdentifiers.hs
Samir Talwar 342391f39d Upgrade Ormolu to v0.5.
This upgrades the version of Ormolu required by the HGE repository to v0.5.0.1, and reformats all code accordingly.

Ormolu v0.5 reformats code that uses infix operators. This is mostly useful, adding newlines and indentation to make it clear which operators are applied first, but in some cases, it's unpleasant. To make this easier on the eyes, I had to do the following:

* Add a few fixity declarations (search for `infix`)
* Add parentheses to make precedence clear, allowing Ormolu to keep everything on one line
* Rename `relevantEq` to `(==~)` in #6651 and set it to `infix 4`
* Add a few _.ormolu_ files (thanks to @hallettj for helping me get started), mostly for Autodocodec operators that don't have explicit fixity declarations

In general, I think these changes are quite reasonable. They mostly affect indentation.

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6675
GitOrigin-RevId: cd47d87f1d089fb0bc9dcbbe7798dbceedcd7d83
2022-11-02 20:55:13 +00:00

422 lines
17 KiB
Haskell

-- | Postgres SQL Rename Identifiers
--
-- 1. Prefix table names with underscores to avoid issues where column names and tables conflict.
-- This can happen because we give columns and tables the name @root@ for some reason,
-- and that can trip up @row_to_json@.
-- See <https://github.com/PostgREST/postgrest/issues/993#issuecomment-340377813>.
-- An alternative solution would be to not create a @TableAlias@ with the name @root@,
-- but that seemed a bit complicated for me to do at the time.
--
-- 2. Bypass the Postgres limitation of truncating identifiers to 63 characters long
-- by prepending they identifier's md5 hash when they are longer than 63 characters.
--
-- We do both operations in the same traversal for performance reasons, but a simpler
-- implementation of (1) would be @transformBi prefixHash@ from the uniplate or the
-- generic-plate package.
--
-- See Postgres docs:
-- <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
module Hasura.Backends.Postgres.SQL.RenameIdentifiers
( -- * Exported API
renameIdentifiers,
renameIdentifiersSelectWith,
)
where
import Crypto.Hash.MD5 qualified as MD5
import Data.ByteString.Base16 qualified as Base16
import Data.HashSet qualified as Set
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Hasura.Backends.Postgres.SQL.DML qualified as S
import Hasura.Backends.Postgres.SQL.Types (Identifier (..), TableIdentifier (..), identifierToTableIdentifier, tableIdentifierToIdentifier)
import Hasura.Prelude
{- Note [Postgres identifier length limitations]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Postgres truncates identifiers to a maximum of 63 characters by default (see
https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS).
-}
------------------------------------------------
-- * API
-- | Prefix table names with undescores and rename long identifiers.
renameIdentifiers :: S.Select -> S.Select
renameIdentifiers = renameTablesAndLongIdentifiers
-- | prefix table names with undescores and rename long identifiers.
renameIdentifiersSelectWith :: S.SelectWithG S.Select -> S.SelectWithG S.Select
renameIdentifiersSelectWith = renameTablesAndLongIdentifiersWith
------------------------------------------------
-- * Prefix long identifiers
-- $prefix_md5_implementation
--
-- We are traversing the query transform 'Identifier's in a query that are
-- longer than 63 characters by prefixing them with their md5 hash.
--
-- This works because:
--
-- 1. Database table references and column references also use `Identifier`, but they
-- cannot be more than 63 characters long, so we will never transform those cases.
-- 2. The md5 hash is a 32 characters long deterministic representation of the identifier,
-- so even when truncated by postgres, it will always be enough to identify the identifiers.
-- 3. It is possible in theory for to identifiers to produce the same hash, but extremely
-- unlikely and I don't think we'll ever run into such a case.
--
-- /Note/ that we could in theory replace the identifier with a hash,
-- but we prefix the hash instead for our benefit as developers - if we need
-- to read the query at some point we can look at the rest of the identifier
-- ignoring the hash for a readable representation.
---------------------------------------------------
-- | Prefix md5 hash if identifier length is over 63 characters.
-- We assume (rightly) that identifiers with names longer than 63 characters are not
-- database table columns, and are made by us using aliases, so we should be free to
-- rename them.
prefixHash :: Text -> Text
prefixHash name =
if T.length name > 63
then
let hash = T.decodeUtf8 . Base16.encode . MD5.hash . T.encodeUtf8 $ name
in "md5_" <> hash <> "_" <> name
else name
identifierPrefixHash :: Identifier -> Identifier
identifierPrefixHash (Identifier name) = Identifier $ prefixHash name
------------------------------------------------
-- * Prefix table names with underscore
-- \$prefix_md5_implementation
-- | Prefix table names with underscores to avoid issues where column names and tables conflict.
-- This can happen because we give columns and tables the name @root@ for some reason,
-- and that can trip up @row_to_json@.
-- See <https://github.com/PostgREST/postgrest/issues/993#issuecomment-340377813>.
-- An alternative solution would be to not create a @TableAlias@ with the name @root@,
-- but that seemed a bit complicated for me to do at the time.
-- ** API
renameTablesAndLongIdentifiers :: S.Select -> S.Select
renameTablesAndLongIdentifiers = runMyState . uSelect
renameTablesAndLongIdentifiersWith :: S.SelectWithG S.Select -> S.SelectWithG S.Select
renameTablesAndLongIdentifiersWith = runMyState . uSelectWith
------------------------------------------------
-- ** Data types
runMyState :: MyState a -> a
runMyState = flip evalState noTables
noTables :: TableNames
noTables = TableNames mempty
-- | The tables in scope
newtype TableNames = TableNames
{ _tables :: Set.HashSet TableIdentifier
}
deriving (Show, Eq)
type MyState = State TableNames
------------------------------------------------
-- ** Utilities
-- | attach a prefix to an identifier
mkPrefixedTableName :: Text -> Text
mkPrefixedTableName identifier = prefixHash $ "_" <> identifier
-- | Add the alias to the set and return a prefixed alias.
addAliasAndPrefixHash :: S.TableAlias -> MyState S.TableAlias
addAliasAndPrefixHash tableAlias@(S.TableAlias identifier) = do
tables <- _tables <$> get
put $ TableNames $ Set.insert (S.tableAliasToIdentifier tableAlias) tables
pure $ S.TableAlias $ Identifier $ mkPrefixedTableName (getIdenTxt identifier)
-- | Search for the identifier in the table names set and return
-- a prefixed identifier if found, or the original identifier
-- if not found in the set.
getTableNameAndPrefixHash :: Identifier -> MyState Identifier
getTableNameAndPrefixHash identifier =
tableIdentifierToIdentifier <$> getTableIdentifierAndPrefixHash (identifierToTableIdentifier identifier)
-- | Search for the table identifier in the table names set and return
-- a prefixed table identifier if found, or the original table identifier
-- if not found in the set.
getTableIdentifierAndPrefixHash :: TableIdentifier -> MyState TableIdentifier
getTableIdentifierAndPrefixHash identifier = do
tables <- _tables <$> get
pure $
if Set.member identifier tables
then TableIdentifier $ mkPrefixedTableName (unTableIdentifier identifier)
else identifier
-- | Run an action that might change the tables names set
-- and discard the changes made to the set.
restoringTables :: MyState a -> MyState a
restoringTables action = do
tables <- _tables <$> get
res <- action
-- restore the tables to before the action
modify' $ \s -> s {_tables = tables}
pure res
------------------------------------------------
-- ** Algorithm
-- | We run the algorithm on each CTE separately and discard the table names set,
-- then we run the algorithm on the main select and return that result
-- (with the table names found in scope).
uSelectWith :: S.SelectWithG S.Select -> MyState (S.SelectWithG S.Select)
uSelectWith (S.SelectWith ctes baseSelect) =
S.SelectWith
<$> forM ctes (\(alias, sel) -> (prefixHashTableAlias alias,) <$> restoringTables (uSelect sel))
<*> uSelect baseSelect
-- | We go in order of each component in the select, starting with
-- the from and CTE clauses (as those introduce new table names to scope).
-- We return a transformed 'Select' (with the table names).
uSelect :: S.Select -> MyState S.Select
uSelect (S.Select ctes distinctM extrs fromM whereM groupByM havingM orderByM limitM offsetM) = do
-- Potentially introduces a new alias in subsequent CTEs and the main select,
-- so it should go first.
newCTEs <- for ctes $ \(alias, cte) ->
(,)
<$> addAliasAndPrefixHash alias
<*> uSelect cte
-- Potentially introduces a new alias so it should go before the rest.
newFromM <- mapM uFromExp fromM
newWhereM <- forM whereM $
\(S.WhereFrag be) -> S.WhereFrag <$> uBoolExp be
newGroupByM <- forM groupByM $
\(S.GroupByExp l) -> S.GroupByExp <$> mapM uSqlExp l
newHavingM <- forM havingM $
\(S.HavingExp be) -> S.HavingExp <$> uBoolExp be
newOrderByM <- mapM uOrderBy orderByM
newDistinctM <- mapM uDistinct distinctM
newExtrs <- mapM uExtractor extrs
newLimitM <- mapM uLimit limitM
newOffsetM <- mapM uOffset offsetM
pure $
S.Select
newCTEs
newDistinctM
newExtrs
newFromM
newWhereM
newGroupByM
newHavingM
newOrderByM
newLimitM
newOffsetM
where
uDistinct = \case
S.DistinctSimple -> pure S.DistinctSimple
S.DistinctOn exprs -> S.DistinctOn <$> mapM uSqlExp exprs
uExtractor (S.Extractor expr alias) =
S.Extractor <$> uSqlExp expr <*> pure (fmap prefixHashColumnAlias alias)
uLimit (S.LimitExp expr) = S.LimitExp <$> uSqlExp expr
uOffset (S.OffsetExp expr) = S.OffsetExp <$> uSqlExp expr
-- | Transform every @from_item@.
-- Potentially introduces a new alias.
uFromExp :: S.FromExp -> MyState S.FromExp
uFromExp (S.FromExp fromItems) =
S.FromExp <$> mapM uFromItem fromItems
-- | Transform a single @from_item@.
-- Potentially introduces a new alias.
uFromItem :: S.FromItem -> MyState S.FromItem
uFromItem fromItem = case fromItem of
-- _Note_: Potentially introduces a new alias
-- qualifiedTable represents a database table so we don't need to prefix it with a hash.
S.FISimple qualifiedTable maybeAlias ->
S.FISimple qualifiedTable <$> mapM addAliasAndPrefixHash maybeAlias
S.FIIdentifier identifier ->
S.FIIdentifier <$> getTableIdentifierAndPrefixHash identifier
S.FIFunc funcExp ->
S.FIFunc <$> uFunctionExp funcExp
-- We transform the arguments and result table alias
-- Note: Potentially introduces a new alias
S.FIUnnest args tableAlias columnAliases ->
S.FIUnnest
<$> mapM uSqlExp args
<*> addAliasAndPrefixHash tableAlias
<*> pure (map prefixHashColumnAlias columnAliases)
-- Note: Potentially introduces a new alias
S.FISelect isLateral select alias -> do
-- we are kind of ignoring if we have to reset
-- identifiers to empty based on correlation.
-- If this select is not part of a lateral join, then it shouldn't
-- have access to tables exposed previously.
-- > unless isLateral $ modify' $ \s -> s { _uqIdentifiers = Map.empty}
newSel <- restoringTables $ uSelect select
newAls <- addAliasAndPrefixHash alias
pure $ S.FISelect isLateral newSel newAls
-- _Note_: Potentially introduces a new alias
S.FISelectWith isLateral selectWith alias -> do
newSelectWith <- uSelectWith selectWith
newAls <- addAliasAndPrefixHash alias
pure $ S.FISelectWith isLateral newSelectWith newAls
S.FIValues (S.ValuesExp tups) alias mCols -> do
newValExp <- fmap S.ValuesExp $
forM tups $ \(S.TupleExp ts) ->
S.TupleExp <$> mapM uSqlExp ts
pure $ S.FIValues newValExp (prefixHashTableAlias alias) (fmap (map prefixHashColumnAlias) mCols)
-- _Note_: Potentially introduces a new alias
S.FIJoin joinExp ->
S.FIJoin <$> uJoinExp joinExp
-- | Transform a function call expression.
uFunctionExp :: S.FunctionExp -> MyState S.FunctionExp
uFunctionExp (S.FunctionExp functionName args maybeAlias) =
S.FunctionExp functionName
<$> uFunctionArgs args
<*> mapM uFunctionAlias maybeAlias
-- | Transform function call arguments.
uFunctionArgs :: S.FunctionArgs -> MyState S.FunctionArgs
uFunctionArgs (S.FunctionArgs positional named) =
S.FunctionArgs <$> mapM uSqlExp positional <*> mapM uSqlExp named
-- | Transform a function call alias.
uFunctionAlias :: S.FunctionAlias -> MyState S.FunctionAlias
uFunctionAlias (S.FunctionAlias alias definitionList) =
S.FunctionAlias
<$> addAliasAndPrefixHash alias
<*> pure (fmap (map uDefinitionList) definitionList)
where
uDefinitionList (S.FunctionDefinitionListItem columnAlias typ) =
S.FunctionDefinitionListItem (prefixHashColumnAlias columnAlias) typ
-- | Transform join expressions.
-- Potentially introduces a new alias.
uJoinExp :: S.JoinExpr -> MyState S.JoinExpr
uJoinExp (S.JoinExpr left joinType right joinCond) = do
leftN <- uFromItem left
rightN <- uFromItem right
joinCondN <- uJoinCond joinCond
pure $ S.JoinExpr leftN joinType rightN joinCondN
-- | Transform Join condition. `ON` join condition might contain references
-- to table names and aliases.
uJoinCond :: S.JoinCond -> MyState S.JoinCond
uJoinCond joinCond = case joinCond of
S.JoinOn be -> S.JoinOn <$> uBoolExp be
S.JoinUsing cols -> pure $ S.JoinUsing $ map identifierPrefixHash cols
-- | Transform boolean expression.
--
-- The boolean expression structure does not contain a table name currently,
-- So we look for 'SQLExp's and transform those, as those may contain table
-- names and aliases.
--
-- We discard table names that might be introduced here because we don't
-- use them outside of the boolean expression.
uBoolExp :: S.BoolExp -> MyState S.BoolExp
uBoolExp =
restoringTables . \case
S.BELit b -> pure $ S.BELit b
S.BEBin op left right ->
S.BEBin op <$> uBoolExp left <*> uBoolExp right
S.BENot b -> S.BENot <$> uBoolExp b
S.BECompare op left right ->
S.BECompare op <$> uSqlExp left <*> uSqlExp right
S.BECompareAny op left right ->
S.BECompareAny op <$> uSqlExp left <*> uSqlExp right
S.BENull e -> S.BENull <$> uSqlExp e
S.BENotNull e -> S.BENotNull <$> uSqlExp e
S.BEExists sel -> S.BEExists <$> uSelect sel
S.BEIN left exps -> S.BEIN <$> uSqlExp left <*> mapM uSqlExp exps
S.BEExp e -> S.BEExp <$> uSqlExp e
-- | Transform a SQL expression.
-- We look for table names and aliases and rename them if needed.
-- SQL expressions do not introduce new table aliases, so we discard
-- the new aliases that might be generated here.
uSqlExp :: S.SQLExp -> MyState S.SQLExp
uSqlExp =
restoringTables . \case
S.SEPrep i -> pure $ S.SEPrep i
S.SENull -> pure S.SENull
S.SELit t -> pure $ S.SELit t
S.SEUnsafe t -> pure $ S.SEUnsafe t
S.SESelect s -> S.SESelect <$> uSelect s
S.SEStar qual -> S.SEStar <$> traverse uQual qual
S.SEIdentifier identifier -> pure $ S.SEIdentifier $ identifierPrefixHash identifier
-- this is for row expressions
S.SERowIdentifier identifier -> S.SERowIdentifier <$> getTableNameAndPrefixHash identifier
-- we rename the table alias if needed
S.SEQIdentifier (S.QIdentifier qualifier identifier) -> do
newQualifier <- uQual qualifier
pure $ S.SEQIdentifier $ S.QIdentifier newQualifier $ Identifier $ prefixHash $ getIdenTxt identifier
S.SEFnApp fn args orderBy ->
S.SEFnApp fn
<$> mapM uSqlExp args
<*> mapM uOrderBy orderBy
S.SEOpApp op args ->
S.SEOpApp op <$> mapM uSqlExp args
S.SETyAnn e ty ->
S.SETyAnn
<$> uSqlExp e
<*> pure ty
S.SECond be onTrue onFalse ->
S.SECond
<$> uBoolExp be
<*> uSqlExp onTrue
<*> uSqlExp onFalse
S.SEBool be ->
S.SEBool <$> uBoolExp be
S.SEExcluded t ->
pure $ S.SEExcluded t
S.SEArray l ->
S.SEArray <$> mapM uSqlExp l
S.SEArrayIndex arrayExp indexExp ->
S.SEArrayIndex <$> uSqlExp arrayExp <*> uSqlExp indexExp
S.SETuple (S.TupleExp l) ->
S.SETuple . S.TupleExp <$> mapM uSqlExp l
S.SECount cty -> pure $ S.SECount cty
S.SENamedArg arg val -> S.SENamedArg arg <$> uSqlExp val
S.SEFunction funcExp -> S.SEFunction <$> uFunctionExp funcExp
where
-- rename the table alias if needed
uQual = \case
S.QualifiedIdentifier identifier typeAnnotation ->
S.QualifiedIdentifier <$> getTableIdentifierAndPrefixHash identifier <*> pure typeAnnotation
-- refers to a database table
S.QualTable t -> pure $ S.QualTable t
S.QualVar t -> pure $ S.QualVar t
-- | Transform order by clauses.
-- Since order by does not introduce new aliases we can discard the new names
-- that might be added, this is already done by `uSqlExp` though.
uOrderBy :: S.OrderByExp -> MyState S.OrderByExp
uOrderBy (S.OrderByExp ordByItems) =
S.OrderByExp <$> mapM uOrderByItem ordByItems
where
uOrderByItem (S.OrderByItem expr ordering nullsOrder) = do
exprN <- uSqlExp expr
pure $ S.OrderByItem exprN ordering nullsOrder
-- | Prefix a table alias with a hash if needed.
prefixHashTableAlias :: S.TableAlias -> S.TableAlias
prefixHashTableAlias (S.TableAlias identifier) = S.TableAlias (Identifier $ prefixHash $ getIdenTxt identifier)
-- | Prefix a column alias with a hash if needed.
prefixHashColumnAlias :: S.ColumnAlias -> S.ColumnAlias
prefixHashColumnAlias (S.ColumnAlias identifier) = S.ColumnAlias (Identifier $ prefixHash $ getIdenTxt identifier)