graphql-engine/server/src-lib/Hasura/Backends/Postgres/SQL/RenameIdentifiers.hs
Gil Mizrahi e626d87a3c server/postgres: Long identifiers in insert with parameters
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5466
Co-authored-by: Brandon Martin <40686+codedmart@users.noreply.github.com>
GitOrigin-RevId: dbc028529081de6257cc24acd1a5bfde8b39d057
2022-08-12 00:27:31 +00:00

413 lines
16 KiB
Haskell

-- | Postgres SQL Rename Identifiers
--
-- 1. Prefix table names with underscores to avoid issues where column names and tables conflict.
-- This can happen because we give columns and tables the name @root@ for some reason,
-- and that can trip up @row_to_json@.
-- See <https://github.com/PostgREST/postgrest/issues/993#issuecomment-340377813>.
-- An alternative solution would be to not create a @TableAlias@ with the name @root@,
-- but that seemed a bit complicated for me to do at the time.
--
-- 2. Bypass the Postgres limitation of truncating identifiers to 63 characters long
-- by prepending they identifier's md5 hash when they are longer than 63 characters.
--
-- We do both operations in the same traversal for performance reasons, but a simpler
-- implementation of (1) would be @transformBi prefixHash@ from the uniplate or the
-- generic-plate package.
--
-- See Postgres docs:
-- <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
module Hasura.Backends.Postgres.SQL.RenameIdentifiers
( -- * Exported API
renameIdentifiers,
renameIdentifiersSelectWith,
)
where
import Crypto.Hash.MD5 qualified as MD5
import Data.ByteString.Base16 qualified as Base16
import Data.HashSet qualified as Set
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Hasura.Backends.Postgres.SQL.DML qualified as S
import Hasura.Backends.Postgres.SQL.Types (Identifier (..))
import Hasura.Prelude
{- Note [Postgres identifier length limitations]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Postgres truncates identifiers to a maximum of 63 characters by default (see
https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS).
-}
------------------------------------------------
-- * API
-- | Prefix table names with undescores and rename long identifiers.
renameIdentifiers :: S.Select -> S.Select
renameIdentifiers = renameTablesAndLongIdentifiers
-- | prefix table names with undescores and rename long identifiers.
renameIdentifiersSelectWith :: S.SelectWithG S.Select -> S.SelectWithG S.Select
renameIdentifiersSelectWith = renameTablesAndLongIdentifiersWith
------------------------------------------------
-- * Prefix long identifiers
-- $prefix_md5_implementation
--
-- We are traversing the query transform 'Identifier's in a query that are
-- longer than 63 characters by prefixing them with their md5 hash.
--
-- This works because:
--
-- 1. Database table references and column references also use `Identifier`, but they
-- cannot be more than 63 characters long, so we will never transform those cases.
-- 2. The md5 hash is a 32 characters long deterministic representation of the identifier,
-- so even when truncated by postgres, it will always be enough to identify the identifiers.
-- 3. It is possible in theory for to identifiers to produce the same hash, but extremely
-- unlikely and I don't think we'll ever run into such a case.
--
-- /Note/ that we could in theory replace the identifier with a hash,
-- but we prefix the hash instead for our benefit as developers - if we need
-- to read the query at some point we can look at the rest of the identifier
-- ignoring the hash for a readable representation.
---------------------------------------------------
-- | Prefix md5 hash if identifier length is over 63 characters.
-- We assume (rightly) that identifiers with names longer than 63 characters are not
-- database table columns, and are made by us using aliases, so we should be free to
-- rename them.
prefixHash :: Identifier -> Identifier
prefixHash (Identifier name) =
Identifier $
if T.length name > 63
then
let hash = T.decodeUtf8 . Base16.encode . MD5.hash . T.encodeUtf8 $ name
in "md5_" <> hash <> "_" <> name
else name
------------------------------------------------
-- * Prefix table names with underscore
-- $prefix_md5_implementation
-- | Prefix table names with underscores to avoid issues where column names and tables conflict.
-- This can happen because we give columns and tables the name @root@ for some reason,
-- and that can trip up @row_to_json@.
-- See <https://github.com/PostgREST/postgrest/issues/993#issuecomment-340377813>.
-- An alternative solution would be to not create a @TableAlias@ with the name @root@,
-- but that seemed a bit complicated for me to do at the time.
-- ** API
renameTablesAndLongIdentifiers :: S.Select -> S.Select
renameTablesAndLongIdentifiers = runMyState . uSelect
renameTablesAndLongIdentifiersWith :: S.SelectWithG S.Select -> S.SelectWithG S.Select
renameTablesAndLongIdentifiersWith = runMyState . uSelectWith
------------------------------------------------
-- ** Data types
runMyState :: MyState a -> a
runMyState = flip evalState noTables
noTables :: TableNames
noTables = TableNames mempty
-- | The tables in scope
newtype TableNames = TableNames
{ _tables :: Set.HashSet Identifier
}
deriving (Show, Eq)
type MyState = State TableNames
------------------------------------------------
-- ** Utilities
-- | attach a prefix to an identifier
mkPrefixedName :: Identifier -> Identifier
mkPrefixedName identifier = prefixHash $ Identifier "_" <> identifier
-- | Add the alias to the set and return a prefixed alias.
addAliasAndPrefixHash :: S.TableAlias -> MyState S.TableAlias
addAliasAndPrefixHash (S.TableAlias identifier) = do
tables <- _tables <$> get
put $ TableNames $ Set.insert identifier tables
pure $ S.TableAlias $ mkPrefixedName identifier
-- | Search for the identifier in the table names set and return
-- a prefixed identifier if found, or the original identifier
-- if not found in the set.
getTableNameAndPrefixHash :: Identifier -> MyState Identifier
getTableNameAndPrefixHash identifier = do
tables <- _tables <$> get
pure $
if Set.member identifier tables
then mkPrefixedName identifier
else identifier
-- | Run an action that might change the tables names set
-- and discard the changes made to the set.
restoringTables :: MyState a -> MyState a
restoringTables action = do
tables <- _tables <$> get
res <- action
-- restore the tables to before the action
modify' $ \s -> s {_tables = tables}
pure res
------------------------------------------------
-- ** Algorithm
-- | We run the algorithm on each CTE separately and discard the table names set,
-- then we run the algorithm on the main select and return that result
-- (with the table names found in scope).
uSelectWith :: S.SelectWithG S.Select -> MyState (S.SelectWithG S.Select)
uSelectWith (S.SelectWith ctes baseSelect) =
S.SelectWith
<$> forM ctes (\(alias, sel) -> (prefixHashTableAlias alias,) <$> restoringTables (uSelect sel))
<*> uSelect baseSelect
-- | We go in order of each component in the select, starting with
-- the from and CTE clauses (as those introduce new table names to scope).
-- We return a transformed 'Select' (with the table names).
uSelect :: S.Select -> MyState S.Select
uSelect (S.Select ctes distinctM extrs fromM whereM groupByM havingM orderByM limitM offsetM) = do
-- Potentially introduces a new alias in subsequent CTEs and the main select,
-- so it should go first.
newCTEs <- for ctes $ \(alias, cte) ->
(,)
<$> addAliasAndPrefixHash alias
<*> uSelect cte
-- Potentially introduces a new alias so it should go before the rest.
newFromM <- mapM uFromExp fromM
newWhereM <- forM whereM $
\(S.WhereFrag be) -> S.WhereFrag <$> uBoolExp be
newGroupByM <- forM groupByM $
\(S.GroupByExp l) -> S.GroupByExp <$> mapM uSqlExp l
newHavingM <- forM havingM $
\(S.HavingExp be) -> S.HavingExp <$> uBoolExp be
newOrderByM <- mapM uOrderBy orderByM
newDistinctM <- mapM uDistinct distinctM
newExtrs <- mapM uExtractor extrs
newLimitM <- mapM uLimit limitM
newOffsetM <- mapM uOffset offsetM
pure $
S.Select
newCTEs
newDistinctM
newExtrs
newFromM
newWhereM
newGroupByM
newHavingM
newOrderByM
newLimitM
newOffsetM
where
uDistinct = \case
S.DistinctSimple -> pure S.DistinctSimple
S.DistinctOn exprs -> S.DistinctOn <$> mapM uSqlExp exprs
uExtractor (S.Extractor expr alias) =
S.Extractor <$> uSqlExp expr <*> pure (fmap prefixHashColumnAlias alias)
uLimit (S.LimitExp expr) = S.LimitExp <$> uSqlExp expr
uOffset (S.OffsetExp expr) = S.OffsetExp <$> uSqlExp expr
-- | Transform every @from_item@.
-- Potentially introduces a new alias.
uFromExp :: S.FromExp -> MyState S.FromExp
uFromExp (S.FromExp fromItems) =
S.FromExp <$> mapM uFromItem fromItems
-- | Transform a single @from_item@.
-- Potentially introduces a new alias.
uFromItem :: S.FromItem -> MyState S.FromItem
uFromItem fromItem = case fromItem of
-- _Note_: Potentially introduces a new alias
-- qualifiedTable represents a database table so we don't need to prefix it with a hash.
S.FISimple qualifiedTable maybeAlias ->
S.FISimple qualifiedTable <$> mapM addAliasAndPrefixHash maybeAlias
S.FIIdentifier identifier ->
S.FIIdentifier <$> getTableNameAndPrefixHash identifier
S.FIFunc funcExp ->
S.FIFunc <$> uFunctionExp funcExp
-- We transform the arguments and result table alias
-- Note: Potentially introduces a new alias
S.FIUnnest args tableAlias columnAliases ->
S.FIUnnest
<$> mapM uSqlExp args
<*> addAliasAndPrefixHash tableAlias
<*> pure (map prefixHashColumnAlias columnAliases)
-- Note: Potentially introduces a new alias
S.FISelect isLateral select alias -> do
-- we are kind of ignoring if we have to reset
-- identifiers to empty based on correlation.
-- If this select is not part of a lateral join, then it shouldn't
-- have access to tables exposed previously.
-- > unless isLateral $ modify' $ \s -> s { _uqIdentifiers = Map.empty}
newSel <- restoringTables $ uSelect select
newAls <- addAliasAndPrefixHash alias
pure $ S.FISelect isLateral newSel newAls
-- _Note_: Potentially introduces a new alias
S.FISelectWith isLateral selectWith alias -> do
newSelectWith <- uSelectWith selectWith
newAls <- addAliasAndPrefixHash alias
pure $ S.FISelectWith isLateral newSelectWith newAls
S.FIValues (S.ValuesExp tups) alias mCols -> do
newValExp <- fmap S.ValuesExp $
forM tups $ \(S.TupleExp ts) ->
S.TupleExp <$> mapM uSqlExp ts
pure $ S.FIValues newValExp (prefixHashTableAlias alias) (fmap (map prefixHashColumnAlias) mCols)
-- _Note_: Potentially introduces a new alias
S.FIJoin joinExp ->
S.FIJoin <$> uJoinExp joinExp
-- | Transform a function call expression.
uFunctionExp :: S.FunctionExp -> MyState S.FunctionExp
uFunctionExp (S.FunctionExp functionName args maybeAlias) =
S.FunctionExp functionName
<$> uFunctionArgs args
<*> mapM uFunctionAlias maybeAlias
-- | Transform function call arguments.
uFunctionArgs :: S.FunctionArgs -> MyState S.FunctionArgs
uFunctionArgs (S.FunctionArgs positional named) =
S.FunctionArgs <$> mapM uSqlExp positional <*> mapM uSqlExp named
-- | Transform a function call alias.
uFunctionAlias :: S.FunctionAlias -> MyState S.FunctionAlias
uFunctionAlias (S.FunctionAlias alias definitionList) =
S.FunctionAlias
<$> addAliasAndPrefixHash alias
<*> pure (fmap (map uDefinitionList) definitionList)
where
uDefinitionList (S.FunctionDefinitionListItem columnAlias typ) =
S.FunctionDefinitionListItem (prefixHashColumnAlias columnAlias) typ
-- | Transform join expressions.
-- Potentially introduces a new alias.
uJoinExp :: S.JoinExpr -> MyState S.JoinExpr
uJoinExp (S.JoinExpr left joinType right joinCond) = do
leftN <- uFromItem left
rightN <- uFromItem right
joinCondN <- uJoinCond joinCond
pure $ S.JoinExpr leftN joinType rightN joinCondN
-- | Transform Join condition. `ON` join condition might contain references
-- to table names and aliases.
uJoinCond :: S.JoinCond -> MyState S.JoinCond
uJoinCond joinCond = case joinCond of
S.JoinOn be -> S.JoinOn <$> uBoolExp be
S.JoinUsing cols -> pure $ S.JoinUsing $ map prefixHash cols
-- | Transform boolean expression.
--
-- The boolean expression structure does not contain a table name currently,
-- So we look for 'SQLExp's and transform those, as those may contain table
-- names and aliases.
--
-- We discard table names that might be introduced here because we don't
-- use them outside of the boolean expression.
uBoolExp :: S.BoolExp -> MyState S.BoolExp
uBoolExp =
restoringTables . \case
S.BELit b -> pure $ S.BELit b
S.BEBin op left right ->
S.BEBin op <$> uBoolExp left <*> uBoolExp right
S.BENot b -> S.BENot <$> uBoolExp b
S.BECompare op left right ->
S.BECompare op <$> uSqlExp left <*> uSqlExp right
S.BECompareAny op left right ->
S.BECompareAny op <$> uSqlExp left <*> uSqlExp right
S.BENull e -> S.BENull <$> uSqlExp e
S.BENotNull e -> S.BENotNull <$> uSqlExp e
S.BEExists sel -> S.BEExists <$> uSelect sel
S.BEIN left exps -> S.BEIN <$> uSqlExp left <*> mapM uSqlExp exps
S.BEExp e -> S.BEExp <$> uSqlExp e
-- | Transform a SQL expression.
-- We look for table names and aliases and rename them if needed.
-- SQL expressions do not introduce new table aliases, so we discard
-- the new aliases that might be generated here.
uSqlExp :: S.SQLExp -> MyState S.SQLExp
uSqlExp =
restoringTables . \case
S.SEPrep i -> pure $ S.SEPrep i
S.SENull -> pure S.SENull
S.SELit t -> pure $ S.SELit t
S.SEUnsafe t -> pure $ S.SEUnsafe t
S.SESelect s -> S.SESelect <$> uSelect s
S.SEStar qual -> S.SEStar <$> traverse uQual qual
S.SEIdentifier identifier -> pure $ S.SEIdentifier $ prefixHash identifier
-- this is for row expressions
S.SERowIdentifier identifier -> S.SERowIdentifier <$> getTableNameAndPrefixHash identifier
-- we rename the table alias if needed
S.SEQIdentifier (S.QIdentifier qualifier identifier) -> do
newQualifier <- uQual qualifier
pure $ S.SEQIdentifier $ S.QIdentifier newQualifier $ prefixHash identifier
S.SEFnApp fn args orderBy ->
S.SEFnApp fn
<$> mapM uSqlExp args
<*> mapM uOrderBy orderBy
S.SEOpApp op args ->
S.SEOpApp op <$> mapM uSqlExp args
S.SETyAnn e ty ->
S.SETyAnn
<$> uSqlExp e
<*> pure ty
S.SECond be onTrue onFalse ->
S.SECond
<$> uBoolExp be
<*> uSqlExp onTrue
<*> uSqlExp onFalse
S.SEBool be ->
S.SEBool <$> uBoolExp be
S.SEExcluded t ->
pure $ S.SEExcluded t
S.SEArray l ->
S.SEArray <$> mapM uSqlExp l
S.SEArrayIndex arrayExp indexExp ->
S.SEArrayIndex <$> uSqlExp arrayExp <*> uSqlExp indexExp
S.SETuple (S.TupleExp l) ->
S.SETuple . S.TupleExp <$> mapM uSqlExp l
S.SECount cty -> pure $ S.SECount cty
S.SENamedArg arg val -> S.SENamedArg arg <$> uSqlExp val
S.SEFunction funcExp -> S.SEFunction <$> uFunctionExp funcExp
where
-- rename the table alias if needed
uQual = \case
S.QualifiedIdentifier identifier typeAnnotation ->
S.QualifiedIdentifier <$> getTableNameAndPrefixHash identifier <*> pure typeAnnotation
-- refers to a database table
S.QualTable t -> pure $ S.QualTable t
S.QualVar t -> pure $ S.QualVar t
-- | Transform order by clauses.
-- Since order by does not introduce new aliases we can discard the new names
-- that might be added, this is already done by `uSqlExp` though.
uOrderBy :: S.OrderByExp -> MyState S.OrderByExp
uOrderBy (S.OrderByExp ordByItems) =
S.OrderByExp <$> mapM uOrderByItem ordByItems
where
uOrderByItem (S.OrderByItem expr ordering nullsOrder) = do
exprN <- uSqlExp expr
pure $ S.OrderByItem exprN ordering nullsOrder
-- | Prefix a table alias with a hash if needed.
prefixHashTableAlias :: S.TableAlias -> S.TableAlias
prefixHashTableAlias (S.TableAlias identifier) = S.TableAlias (prefixHash identifier)
-- | Prefix a column alias with a hash if needed.
prefixHashColumnAlias :: S.ColumnAlias -> S.ColumnAlias
prefixHashColumnAlias (S.ColumnAlias identifier) = S.ColumnAlias (prefixHash identifier)