Refactor to factor out type error SrcLoc (#1294)

Some refactoring related to type errors:

- Change the type of `addLocToTypeErr` to something more sensible, and get rid of some redundant calls
- Instead of storing a `SrcLoc` in every single `TypeErr` constructor, factor it out into a new type `ContextualTypeErr`.  This also paves the way for additional contextual information which will help generate better type error messages.

This is 100% refactoring; there should be no behavioral changes.
This commit is contained in:
Brent Yorgey 2023-06-01 14:59:51 -05:00 committed by GitHub
parent 67871a59d2
commit 4509a3b378
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 105 additions and 89 deletions

View File

@ -14,6 +14,7 @@ module Swarm.Language.Pipeline (
processParsedTerm,
processTerm',
processParsedTerm',
prettyTypeErr,
showTypeErrorPos,
processTermEither,
) where
@ -72,7 +73,7 @@ processTerm :: Text -> Either Text (Maybe ProcessedTerm)
processTerm = processTerm' empty empty
-- | Like 'processTerm', but use a term that has already been parsed.
processParsedTerm :: Syntax -> Either TypeErr ProcessedTerm
processParsedTerm :: Syntax -> Either ContextualTypeErr ProcessedTerm
processParsedTerm = processParsedTerm' empty empty
-- | Like 'processTerm', but use explicit starting contexts.
@ -81,26 +82,26 @@ processTerm' ctx capCtx txt = do
mt <- readTerm txt
first (prettyTypeErr txt) $ traverse (processParsedTerm' ctx capCtx) mt
prettyTypeErr :: Text -> TypeErr -> Text
prettyTypeErr code te = teLoc <> prettyText te
prettyTypeErr :: Text -> ContextualTypeErr -> Text
prettyTypeErr code (CTE l te) = teLoc <> prettyText te
where
teLoc = case getTypeErrSrcLoc te of
Just (SrcLoc s e) -> (into @Text . showLoc . fst $ getLocRange code (s, e)) <> ": "
_anyOtherLoc -> ""
teLoc = case l of
SrcLoc s e -> (into @Text . showLoc . fst $ getLocRange code (s, e)) <> ": "
NoLoc -> ""
showLoc (r, c) = show r ++ ":" ++ show c
showTypeErrorPos :: Text -> TypeErr -> ((Int, Int), (Int, Int), Text)
showTypeErrorPos code te = (minusOne start, minusOne end, msg)
showTypeErrorPos :: Text -> ContextualTypeErr -> ((Int, Int), (Int, Int), Text)
showTypeErrorPos code (CTE l te) = (minusOne start, minusOne end, msg)
where
minusOne (x, y) = (x - 1, y - 1)
(start, end) = case getTypeErrSrcLoc te of
Just (SrcLoc s e) -> getLocRange code (s, e)
_anyOtherLoc -> ((1, 1), (65535, 65535)) -- unknown loc spans the whole document
(start, end) = case l of
SrcLoc s e -> getLocRange code (s, e)
NoLoc -> ((1, 1), (65535, 65535)) -- unknown loc spans the whole document
msg = prettyText te
-- | Like 'processTerm'', but use a term that has already been parsed.
processParsedTerm' :: TCtx -> ReqCtx -> Syntax -> Either TypeErr ProcessedTerm
processParsedTerm' :: TCtx -> ReqCtx -> Syntax -> Either ContextualTypeErr ProcessedTerm
processParsedTerm' ctx capCtx t = do
m <- inferTop ctx t
let (caps, capCtx') = requirements capCtx (t ^. sTerm)

View File

@ -9,7 +9,6 @@ import Language.Haskell.TH qualified as TH
import Language.Haskell.TH.Quote
import Swarm.Language.Parse
import Swarm.Language.Pipeline
import Swarm.Language.Pretty (prettyText)
import Swarm.Language.Syntax
import Swarm.Language.Types (Polytype)
import Swarm.Util (failT, liftText)
@ -42,7 +41,7 @@ quoteTermExp s = do
)
parsed <- runParserTH pos parseTerm s
case processParsedTerm parsed of
Left errMsg -> failT [prettyText errMsg]
Left err -> failT [prettyTypeErr (from s) err]
Right ptm -> dataToExpQ ((fmap liftText . cast) `extQ` antiTermExp) ptm
antiTermExp :: Term' Polytype -> Maybe TH.ExpQ

View File

@ -202,28 +202,28 @@ appliedTermPrec (TApp f _) = case f of
appliedTermPrec _ = 10
instance PrettyPrec TypeErr where
prettyPrec _ (UnifyErr _ ty1 ty2) =
prettyPrec _ (UnifyErr ty1 ty2) =
"Can't unify" <+> ppr ty1 <+> "and" <+> ppr ty2
prettyPrec _ (Mismatch _ _mt ty1 ty2) =
prettyPrec _ (Mismatch _mt ty1 ty2) =
"Type mismatch: expected" <+> ppr ty1 <> ", but got" <+> ppr ty2
prettyPrec _ (LambdaArgMismatch _ ty1 ty2) =
prettyPrec _ (LambdaArgMismatch ty1 ty2) =
"Lambda argument has type annotation" <+> ppr ty2 <> ", but expected argument type" <+> ppr ty1
prettyPrec _ (FieldsMismatch _ _fs1 _fs2) = "Fields mismatch!!"
prettyPrec _ (EscapedSkolem _ x) =
prettyPrec _ (FieldsMismatch _fs1 _fs2) = "Fields mismatch!!"
prettyPrec _ (EscapedSkolem x) =
"Skolem variable" <+> pretty x <+> "would escape its scope"
prettyPrec _ (UnboundVar _ x) =
prettyPrec _ (UnboundVar x) =
"Unbound variable" <+> pretty x
prettyPrec _ (Infinite x uty) =
"Infinite type:" <+> ppr x <+> "=" <+> ppr uty
prettyPrec _ (DefNotTopLevel _ t) =
prettyPrec _ (DefNotTopLevel t) =
"Definitions may only be at the top level:" <+> ppr t
prettyPrec _ (CantInfer _ t) =
prettyPrec _ (CantInfer t) =
"Couldn't infer the type of term (this shouldn't happen; please report this as a bug!):" <+> ppr t
prettyPrec _ (CantInferProj _ t) =
prettyPrec _ (CantInferProj t) =
"Can't infer the type of a record projection:" <+> ppr t
prettyPrec _ (UnknownProj _ x t) =
prettyPrec _ (UnknownProj x t) =
"Record does not have a field with name" <+> pretty x <> ":" <+> ppr t
prettyPrec _ (InvalidAtomic _ reason t) =
prettyPrec _ (InvalidAtomic reason t) =
"Invalid atomic block:" <+> ppr reason <> ":" <+> ppr t
instance PrettyPrec InvalidAtomicReason where

View File

@ -13,9 +13,9 @@
-- https://byorgey.wordpress.com/2021/09/08/implementing-hindley-milner-with-the-unification-fd-library/ .
module Swarm.Language.Typecheck (
-- * Type errors
ContextualTypeErr (..),
TypeErr (..),
InvalidAtomicReason (..),
getTypeErrSrcLoc,
-- * Inference monad
TC,
@ -78,11 +78,11 @@ import Prelude hiding (lookup)
-- monad transformer provided by the @unification-fd@ library which
-- supports various operations such as generating fresh variables
-- and unifying things.
type TC = ReaderT UCtx (ExceptT TypeErr (IntBindingT TypeF Identity))
type TC = ReaderT UCtx (ExceptT ContextualTypeErr (IntBindingT TypeF Identity))
-- | Run a top-level inference computation, returning either a
-- 'TypeErr' or a fully resolved 'TModule'.
runTC :: TCtx -> TC UModule -> Either TypeErr TModule
runTC :: TCtx -> TC UModule -> Either ContextualTypeErr TModule
runTC ctx =
(>>= applyBindings)
>>> ( >>=
@ -103,14 +103,15 @@ runTC ctx =
lookup :: SrcLoc -> Var -> TC UType
lookup loc x = do
ctx <- ask
maybe (throwError $ UnboundVar loc x) instantiate (Ctx.lookup x ctx)
maybe (throwTypeErr loc $ UnboundVar x) instantiate (Ctx.lookup x ctx)
-- | Add a source location to a type error and re-throw it.
addLocToTypeErr :: Syntax' ty -> TypeErr -> TC a
addLocToTypeErr s te = case te of
UnifyErr NoLoc a b -> throwError $ UnifyErr (s ^. sLoc) a b
Mismatch NoLoc mt a b -> throwError $ Mismatch (s ^. sLoc) mt a b
_ -> throwError te
-- | Catch any thrown type errors and re-throw them with an added source
-- location.
addLocToTypeErr :: SrcLoc -> TC a -> TC a
addLocToTypeErr l m =
m `catchError` \case
CTE NoLoc te -> throwTypeErr l te
te -> throwError te
------------------------------------------------------------
-- Dealing with variables: free variables, fresh variables,
@ -164,7 +165,17 @@ noSkolems l (Forall xs upty) = do
(\case TyVarF v -> S.singleton v; f -> fold f)
upty'
ftyvs = tyvs `S.difference` S.fromList xs
forM_ (S.lookupMin ftyvs) $ throwError . EscapedSkolem l
forM_ (S.lookupMin ftyvs) $ throwTypeErr l . EscapedSkolem
-- ~~~~ Note [lookupMin to get an arbitrary element]
--
-- `S.lookupMin :: Set a -> Maybe a` returns the smallest
-- element of a set, or Nothing if the set is empty. We don't
-- actually care about getting the *smallest* type variable, but
-- lookupMin is a convenient way to say "just get one element if
-- any exist". The forM_ is actually over the Maybe so it represents
-- doing the throwTypeErr either zero or one time, depending on
-- whether lookupMin returns Nothing or Just.
------------------------------------------------------------
-- Lifted stuff from unification-fd
@ -176,7 +187,7 @@ infix 4 =:=
-- same.
expect :: Maybe Syntax -> UType -> UType -> TC UType
expect ms expected actual = case unifyCheck expected actual of
Apart -> throwError $ Mismatch NoLoc ms expected actual
Apart -> throwTypeErr NoLoc $ Mismatch ms expected actual
Equal -> return expected
MightUnify -> lift $ expected U.=:= actual
@ -267,6 +278,26 @@ generalize uty = do
------------------------------------------------------------
-- Type errors
-- | A type error along with various contextual information to help us
-- generate better error messages. For now there is only a SrcLoc
-- but there will be additional context in the future, such as a
-- stack of stuff we were in the middle of doing, relevant names in
-- scope, etc. (#1297).
data ContextualTypeErr = CTE {cteSrcLoc :: SrcLoc, cteTypeErr :: TypeErr}
deriving (Show)
-- | Create a raw 'ContextualTypeErr' with no context information.
mkRawTypeErr :: TypeErr -> ContextualTypeErr
mkRawTypeErr = CTE NoLoc
-- | Create a 'ContextualTypeErr' value from a 'TypeErr' and context.
mkTypeErr :: SrcLoc -> TypeErr -> ContextualTypeErr
mkTypeErr = CTE
-- | Throw a 'ContextualTypeErr'.
throwTypeErr :: SrcLoc -> TypeErr -> TC a
throwTypeErr l te = throwError (mkTypeErr l te)
-- | Errors that can occur during type checking. The idea is that
-- each error carries information that can be used to help explain
-- what went wrong (though the amount of information carried can and
@ -274,35 +305,35 @@ generalize uty = do
-- separately be pretty-printed to display them to the user.
data TypeErr
= -- | An undefined variable was encountered.
UnboundVar SrcLoc Var
UnboundVar Var
| -- | A Skolem variable escaped its local context.
EscapedSkolem SrcLoc Var
EscapedSkolem Var
| -- | Occurs check failure, i.e. infinite type.
Infinite IntVar UType
| -- | Error generated by the unifier.
UnifyErr SrcLoc (TypeF UType) (TypeF UType)
UnifyErr (TypeF UType) (TypeF UType)
| -- | Type mismatch caught by 'unifyCheck'. The given term was
-- expected to have a certain type, but has a different type
-- instead.
Mismatch SrcLoc (Maybe Syntax) UType UType -- expected, actual
Mismatch (Maybe Syntax) UType UType -- expected, actual
| -- | Lambda argument type mismatch.
LambdaArgMismatch SrcLoc UType UType -- expected, actual
LambdaArgMismatch UType UType -- expected, actual
| -- | Record field mismatch, i.e. based on the expected type we
-- were expecting a record with certain fields, but found one with
-- a different field set.
FieldsMismatch SrcLoc (Set Var) (Set Var) -- expected fields, actual
FieldsMismatch (Set Var) (Set Var) -- expected fields, actual
| -- | A definition was encountered not at the top level.
DefNotTopLevel SrcLoc Term
DefNotTopLevel Term
| -- | A term was encountered which we cannot infer the type of.
-- This should never happen.
CantInfer SrcLoc Term
CantInfer Term
| -- | We can't infer the type of a record projection @r.x@ if we
-- don't concretely know the type of the record @r@.
CantInferProj SrcLoc Term
CantInferProj Term
| -- | An attempt to project out a nonexistent field
UnknownProj SrcLoc Var Term
UnknownProj Var Term
| -- | An invalid argument was provided to @atomic@.
InvalidAtomic SrcLoc InvalidAtomicReason Term
InvalidAtomic InvalidAtomicReason Term
deriving (Show)
-- | Various reasons the body of an @atomic@ might be invalid.
@ -319,24 +350,9 @@ data InvalidAtomicReason
LongConst
deriving (Show)
instance Fallible TypeF IntVar TypeErr where
occursFailure = Infinite
mismatchFailure = UnifyErr NoLoc
getTypeErrSrcLoc :: TypeErr -> Maybe SrcLoc
getTypeErrSrcLoc te = case te of
UnboundVar l _ -> Just l
EscapedSkolem l _ -> Just l
Infinite _ _ -> Nothing
UnifyErr l _ _ -> Just l
Mismatch l _ _ _ -> Just l
LambdaArgMismatch l _ _ -> Just l
FieldsMismatch l _ _ -> Just l
DefNotTopLevel l _ -> Just l
CantInfer l _ -> Just l
CantInferProj l _ -> Just l
UnknownProj l _ _ -> Just l
InvalidAtomic l _ _ -> Just l
instance Fallible TypeF IntVar ContextualTypeErr where
occursFailure v t = mkRawTypeErr (Infinite v t)
mismatchFailure t1 t2 = mkRawTypeErr (UnifyErr t1 t2)
------------------------------------------------------------
-- Type decomposition
@ -381,13 +397,13 @@ decomposeProdTy ty = do
-- | Top-level type inference function: given a context of definition
-- types and a top-level term, either return a type error or its
-- type as a 'TModule'.
inferTop :: TCtx -> Syntax -> Either TypeErr TModule
inferTop :: TCtx -> Syntax -> Either ContextualTypeErr TModule
inferTop ctx = runTC ctx . inferModule
-- | Infer the signature of a top-level expression which might
-- contain definitions.
inferModule :: Syntax -> TC UModule
inferModule s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
inferModule s@(Syntax l t) = addLocToTypeErr l $ case t of
-- For definitions with no type signature, make up a fresh type
-- variable for the body, infer the body under an extended context,
-- and unify the two. Then generalize the type and return an
@ -457,7 +473,7 @@ inferModule s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
-- For most everything else we prefer 'check' because it can often
-- result in better and more localized type error messages.
infer :: Syntax -> TC (Syntax' UType)
infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
infer s@(Syntax l t) = addLocToTypeErr l $ case t of
-- Primitives, i.e. things for which we immediately know the only
-- possible correct type, and knowing an expected type would provide
-- no extra information.
@ -479,7 +495,7 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
-- We should never encounter a TRef since they do not show up in
-- surface syntax, only as values while evaluating (*after*
-- typechecking).
TRef _ -> throwError $ CantInfer l t
TRef _ -> throwTypeErr l $ CantInfer t
-- Just look up variables in the context.
TVar x -> Syntax' l (TVar x) <$> lookup l x
-- Need special case here for applying 'atomic' or 'instant' so we
@ -504,7 +520,7 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
(argTy, resTy) <- decomposeFunTy (f' ^. sType)
-- Then check that the argument has the right type.
x' <- check x argTy `catchError` addLocToTypeErr x
x' <- check x argTy
return $ Syntax' l (SApp f' x') resTy
-- We handle binds in inference mode for a similar reason to
@ -524,8 +540,8 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
case t1' ^. sType of
UTyRcd m -> case M.lookup x m of
Just xTy -> return $ Syntax' l (SProj t1' x) xTy
Nothing -> throwError $ UnknownProj l x (SProj t1 x)
_ -> throwError $ CantInferProj l (SProj t1 x)
Nothing -> throwTypeErr l $ UnknownProj x (SProj t1 x)
_ -> throwTypeErr l $ CantInferProj (SProj t1 x)
-- See Note [Checking and inference for record literals]
SRcd m -> do
@ -539,7 +555,7 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
let upty = toU pty
-- Typecheck against skolemized polytype.
uty <- skolemize upty
_ <- check c uty `catchError` addLocToTypeErr c
_ <- check c uty
-- Make sure no skolem variables have escaped.
ask >>= mapM_ (noSkolems l)
-- If check against skolemized polytype is successful,
@ -547,7 +563,7 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of
-- Free variables should be able to unify with anything in
-- following typechecking steps.
iuty <- instantiate upty
c' <- check c iuty `catchError` addLocToTypeErr c
c' <- check c iuty
return $ Syntax' l (SAnnotate c' pty) (c' ^. sType)
-- Fallback: to infer the type of anything else, make up a fresh unification
@ -666,7 +682,7 @@ inferConst c = case c of
-- We try to stay in checking mode as far as possible, decomposing
-- the expected type as we go and pushing it through the recursion.
check :: Syntax -> UType -> TC (Syntax' UType)
check s@(Syntax l t) expected = (`catchError` addLocToTypeErr s) $ case t of
check s@(Syntax l t) expected = addLocToTypeErr l $ case t of
-- if t : ty, then {t} : {ty}.
-- Note that in theory, if the @Maybe Var@ component of the @SDelay@
-- is @Just@, we should typecheck the body under a context extended
@ -698,7 +714,7 @@ check s@(Syntax l t) expected = (`catchError` addLocToTypeErr s) $ case t of
-- on a lambda doesn't match the expected type,
-- e.g. (\x:int. x + 2) : text -> int, since the usual
-- "expected/but got" language would probably be confusing.
Apart -> throwError $ LambdaArgMismatch l argTy xTy
Apart -> throwTypeErr l $ LambdaArgMismatch argTy xTy
-- Otherwise, make sure to unify the annotation with the
-- expected argument type.
_ -> void $ argTy =:= xTy
@ -744,7 +760,7 @@ check s@(Syntax l t) expected = (`catchError` addLocToTypeErr s) $ case t of
Just pty -> do
let upty = toU pty
uty <- skolemize upty
t1' <- withBinding (lvVar x) upty $ check t1 uty `catchError` addLocToTypeErr t1
t1' <- withBinding (lvVar x) upty $ check t1 uty
return (upty, t1')
-- Now check the type of the body.
@ -757,7 +773,7 @@ check s@(Syntax l t) expected = (`catchError` addLocToTypeErr s) $ case t of
return $ Syntax' l (SLet r x mxTy t1' t2') expected
-- Definitions can only occur at the top level.
SDef {} -> throwError $ DefNotTopLevel l t
SDef {} -> throwTypeErr l $ DefNotTopLevel t
-- To check a record, ensure the expected type is a record type,
-- ensure all the right fields are present, and push the expected
-- types of all the fields down into recursive checks.
@ -773,8 +789,8 @@ check s@(Syntax l t) expected = (`catchError` addLocToTypeErr s) $ case t of
let expectedFields = M.keysSet tyMap
actualFields = M.keysSet fields
when (actualFields /= expectedFields) $
throwError $
FieldsMismatch NoLoc expectedFields actualFields
throwTypeErr l $
FieldsMismatch expectedFields actualFields
m' <- itraverse (\x ms -> check (fromMaybe (STerm (TVar x)) ms) (tyMap ! x)) fields
return $ Syntax' l (SRcd (Just <$> m')) expected
@ -830,7 +846,7 @@ check s@(Syntax l t) expected = (`catchError` addLocToTypeErr s) $ case t of
validAtomic :: Syntax -> TC ()
validAtomic s@(Syntax l t) = do
n <- analyzeAtomic S.empty s
when (n > 1) $ throwError (InvalidAtomic l (TooManyTicks n) t)
when (n > 1) $ throwTypeErr l $ InvalidAtomic (TooManyTicks n) t
-- | Analyze an argument to @atomic@: ensure it contains no nested
-- atomic blocks and no references to external variables, and count
@ -853,10 +869,10 @@ analyzeAtomic locals (Syntax l t) = case t of
-- Constants.
TConst c
-- Nested 'atomic' is not allowed.
| c == Atomic -> throwError $ InvalidAtomic l NestedAtomic t
| c == Atomic -> throwTypeErr l $ InvalidAtomic NestedAtomic t
-- We cannot allow long commands (commands that may require more
-- than one tick to execute) since that could freeze the game.
| isLong c -> throwError $ InvalidAtomic l LongConst t
| isLong c -> throwTypeErr l $ InvalidAtomic LongConst t
-- Otherwise, return 1 or 0 depending on whether the command is
-- tangible.
| otherwise -> return $ if isTangible c then 1 else 0
@ -907,15 +923,15 @@ analyzeAtomic locals (Syntax l t) = case t of
xTy' <- applyBindings xTy
if isSimpleUPolytype xTy'
then return 0
else throwError (InvalidAtomic l (NonSimpleVarType x xTy') t)
else throwTypeErr l $ InvalidAtomic (NonSimpleVarType x xTy') t
-- No lambda, `let` or `def` allowed!
SLam {} -> throwError (InvalidAtomic l AtomicDupingThing t)
SLet {} -> throwError (InvalidAtomic l AtomicDupingThing t)
SDef {} -> throwError (InvalidAtomic l AtomicDupingThing t)
SLam {} -> throwTypeErr l $ InvalidAtomic AtomicDupingThing t
SLet {} -> throwTypeErr l $ InvalidAtomic AtomicDupingThing t
SDef {} -> throwTypeErr l $ InvalidAtomic AtomicDupingThing t
-- We should never encounter a TRef since they do not show up in
-- surface syntax, only as values while evaluating (*after*
-- typechecking).
TRef {} -> throwError (CantInfer l t)
TRef {} -> throwTypeErr l $ CantInfer t
-- An explicit type annotation doesn't change atomicity
SAnnotate s _ -> analyzeAtomic locals s