From 915379af0e707e32c666da277122447d13172e74 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 10 Jun 2015 18:30:10 -0700 Subject: [PATCH] Lex qualified names and operators --- src/Cryptol/Parser.y | 96 ++++++++++++++++--------------- src/Cryptol/Parser/Lexer.x | 11 +++- src/Cryptol/Parser/LexerUtils.hs | 40 +++++++++++-- src/Cryptol/Parser/ParserUtils.hs | 25 ++++---- 4 files changed, 105 insertions(+), 67 deletions(-) diff --git a/src/Cryptol/Parser.y b/src/Cryptol/Parser.y index 03617323..11b0676a 100644 --- a/src/Cryptol/Parser.y +++ b/src/Cryptol/Parser.y @@ -36,10 +36,12 @@ import Paths_cryptol %token NUM { $$@(Located _ (Token (Num {}) _))} - IDENT { $$@(Located _ (Token (Ident {}) _))} STRLIT { $$@(Located _ (Token (StrLit {}) _))} CHARLIT { $$@(Located _ (Token (ChrLit {}) _))} + IDENT { $$@(Located _ (Token (Ident [] _) _))} + QIDENT { $$@(Located _ (Token Ident{} _))} + 'include' { Located $$ (Token (KW KW_include) _)} 'import' { Located $$ (Token (KW KW_import) _)} 'as' { Located $$ (Token (KW KW_as) _)} @@ -81,7 +83,6 @@ import Paths_cryptol '=' { Located $$ (Token (Sym EqDef ) _)} '`' { Located $$ (Token (Sym BackTick) _)} ':' { Located $$ (Token (Sym Colon ) _)} - '::' { Located $$ (Token (Sym ColonColon) _)} '->' { Located $$ (Token (Sym ArrR ) _)} '=>' { Located $$ (Token (Sym FatArrR ) _)} '\\' { Located $$ (Token (Sym Lambda ) _)} @@ -101,7 +102,8 @@ import Paths_cryptol '#' { Located $$ (Token (Op Hash) _)} - OP { $$@(Located _ (Token (Op {}) _))} + OP { $$@(Located _ (Token (Op (Other [] _)) _))} + QOP { $$@(Located _ (Token (Op Other{} ) _))} DOC { $$@(Located _ (Token (White DocStr) _)) } @@ -136,7 +138,7 @@ import Paths_cryptol %left '*' '/' '%' %right '^^' %right NEG '~' -%left OP +%left OP QOP %% @@ -234,8 +236,8 @@ top_decl :: { [TopDecl] } | prim_bind { $1 } prim_bind :: { [TopDecl] } - : mbDoc 'primitive' name ':' schema { mkPrimDecl $1 False (fmap mkUnqual $3) $5 } - | mbDoc 'primitive' '(' op ')' ':' schema { mkPrimDecl $1 True $4 $7 } + : mbDoc 'primitive' name ':' schema { mkPrimDecl $1 False $3 $5 } + | mbDoc 'primitive' '(' op ')' ':' schema { mkPrimDecl $1 True $4 $7 } doc :: { Located String } : DOC { mkDoc (fmap tokenText $1) } @@ -261,7 +263,7 @@ decl :: { Decl } } } | apat op apat '=' expr { at ($1,$5) $ - DBind $ Bind { bName = $2 + DBind $ Bind { bName = fmap mkUnqual $2 , bParams = [$1,$3] , bDef = at $5 (Located emptyRange (DExpr $5)) , bSignature = Nothing @@ -310,7 +312,7 @@ vars_comma :: { [ LName ] } var :: { LName } : name { $1 } - | '(' op ')' { fmap unqual $2 } + | '(' op ')' { $2 } apats :: { [Pattern] } : apat { [$1] } @@ -338,7 +340,7 @@ repl :: { ReplInput } expr :: { Expr } - : iexpr { $1 } + : cexpr { $1 } | expr 'where' '{' '}' { at ($1,$4) $ EWhere $1 [] } | expr 'where' '{' decls '}' { at ($1,$5) $ EWhere $1 (reverse $4) } | expr 'where' 'v{' 'v}' { at ($1,$2) $ EWhere $1 [] } @@ -351,33 +353,43 @@ ifBranches :: { [(Expr, Expr)] } ifBranch :: { (Expr, Expr) } : expr 'then' expr { ($1, $3) } +cexpr :: { Expr } + : sig_expr { $1 } + | 'if' ifBranches 'else' iexpr { at ($1,$4) $ mkIf $2 $4 } + | '\\' apats '->' iexpr { at ($1,$4) $ EFun (reverse $2) $4 } + +sig_expr :: { Expr } + : iexpr { $1 } + | iexpr ':' type { at ($1,$3) $ ETyped $1 $3 } + iexpr :: { Expr } : expr10 { $1 } - | iexpr op expr10 { binOp $1 $2 $3 } - | iexpr ':' type { at ($1,$3) $ ETyped $1 $3 } + | iexpr qop expr10 { binOp $1 $2 $3 } expr10 :: { Expr } : aexprs { mkEApp $1 } - | 'if' ifBranches 'else' iexpr { at ($1,$4) $ mkIf $2 $4 } - | '\\' apats '->' iexpr { at ($1,$4) $ EFun (reverse $2) $4 } - | '-' expr10 %prec NEG { at ($1,$2) $ EApp (at $1 (EVar (mkUnqual (Name "negate")))) $2 } | '~' expr10 { at ($1,$2) $ EApp (at $1 (EVar (mkUnqual (Name "complement")))) $2 } -op :: { LQName } - : OP { let Token (Op (Other str)) _ = thing $1 - in mkUnqual (Name str) A.<$ $1 } +qop :: { LQName } + : op { fmap mkUnqual $1 } + | QOP { let Token (Op (Other ns i)) _ = thing $1 + in mkQual (ModName ns) (Name i) A.<$ $1 } + +op :: { LName } + : OP { let Token (Op (Other [] str)) _ = thing $1 + in Name str A.<$ $1 } -- special cases for operators that are re-used elsewhere - | '*' { Located $1 $ mkUnqual (Name "*" ) } - | '+' { Located $1 $ mkUnqual (Name "+" ) } - | '-' { Located $1 $ mkUnqual (Name "-" ) } - | '~' { Located $1 $ mkUnqual (Name "~" ) } - | '^^' { Located $1 $ mkUnqual (Name "^^") } - | '#' { Located $1 $ mkUnqual (Name "#" ) } + | '*' { Located $1 $ Name "*" } + | '+' { Located $1 $ Name "+" } + | '-' { Located $1 $ Name "-" } + | '~' { Located $1 $ Name "~" } + | '^^' { Located $1 $ Name "^^" } + | '#' { Located $1 $ Name "#" } -ops :: { [LQName] } +ops :: { [LName] } : op { [$1] } | ops ',' op { $3 : $1 } @@ -402,7 +414,7 @@ aexpr :: { Expr } | '`' tick_ty { at ($1,$2) $ ETypeVal $2 } | aexpr '.' selector { at ($1,$3) $ ESel $1 (thing $3) } - | '(' op ')' { at ($1,$3) $ EVar $ thing $2 } + | '(' qop ')' { at ($1,$3) $ EVar $ thing $2 } | '<|' '|>' {% mkPoly (rComb $1 $2) [] } | '<|' poly_terms '|>' {% mkPoly (rComb $1 $3) $2 } @@ -536,10 +548,10 @@ tysyn_params :: { [TParam] } : tysyn_param { [$1] } | tysyn_params tysyn_param { $2 : $1 } -type :: { Type } - : app_type '->' type { at ($1,$3) $ TFun $1 $3 } - | type op app_type { at ($1,$3) $ TInfix $1 $2 $3 } - | app_type { $1 } +type :: { Type } + : app_type '->' type { at ($1,$3) $ TFun $1 $3 } + | type op app_type { at ($1,$3) $ TInfix $1 (fmap mkUnqual $2) $3 } + | app_type { $1 } app_type :: { Type } -- : 'lg2' atype { at ($1,$2) $ TApp TCLg2 [$2] } @@ -587,10 +599,6 @@ field_types :: { [Named Type] } | field_types ',' field_type { $3 : $1 } -qname_parts :: { [LName] } -- Reversed! - : name { [$1] } - | qname_parts '::' name { $3 : $1 } - name :: { LName } : IDENT { $1 { thing = getName $1 } } | 'x' { Located { srcRange = $1, thing = Name "x" }} @@ -600,23 +608,17 @@ name :: { LName } modName :: { Located ModName } - : qname_parts { mkModName $1 } + : qname { mkModName $1 } qname :: { Located QName } - : qname_parts { mkQName $1 } + : name { fmap mkUnqual $1 } + | QIDENT { let Token (Ident ns i) _ = thing $1 + in mkQual (ModName ns) (Name i) A.<$ $1 } -help_name :: { Located QName } - : help_name_parts { mkQName $1 } - -help_name_parts :: { [LName] } - : name { [$1] } - | qname_parts '::' name { $3:$1 } - - | op { [fmap unqual $1] } - | qname_parts '::' op { fmap unqual $3:$1 } - - | '(' op ')' { [fmap unqual $2] } - | '(' qname_parts '::' op ')' { fmap unqual $4:$2 } +help_name :: { Located QName } + : qname { $1 } + | qop { $1 } + | '(' qop ')' { $2 } {- The types that can come after a back-tick: either a type demotion, or an explicit type application. Explicit type applications are converted diff --git a/src/Cryptol/Parser/Lexer.x b/src/Cryptol/Parser/Lexer.x index 37c4550d..c98d51e8 100644 --- a/src/Cryptol/Parser/Lexer.x +++ b/src/Cryptol/Parser/Lexer.x @@ -31,9 +31,12 @@ $unitick = \x7 @id_next = [a-zA-Z0-9_'] | $unilower | $uniupper | $unidigit | $unitick @id = @id_first @id_next* - @op = ([\!\@\#\$\%\^\&\*\~\>\<\?\+\=\|\/] | $unisymbol)+ +@qual = (@id $white* :: $white*)+ +@qual_id = @qual @id +@qual_op = @qual @op + @num2 = "0b" [0-1]+ @num8 = "0o" [0-7]+ @num10 = [0-9]+ @@ -75,6 +78,9 @@ $unitick = \x7 $white+ { emit $ White Space } "//" .* { emit $ White LineComment } +@qual_id { mkQualIdent } +@qual_op { mkQualOp } + -- Please update the docs, if you add new entries. "else" { emit $ KW KW_else } "extern" { emit $ KW KW_extern } @@ -119,7 +125,6 @@ $white+ { emit $ White Space } ";" { emit $ Sym Semi } "." { emit $ Sym Dot } ":" { emit $ Sym Colon } -"::" { emit $ Sym ColonColon } "`" { emit $ Sym BackTick } ".." { emit $ Sym DotDot } "..." { emit $ Sym DotDotDot } @@ -149,7 +154,7 @@ $white+ { emit $ White Space } "~" { emit (Op Complement) } -- all other operators -@op { emitS (Op . Other) } +@op { emitS (Op . Other []) } } diff --git a/src/Cryptol/Parser/LexerUtils.hs b/src/Cryptol/Parser/LexerUtils.hs index b2cf9ddc..6c72a897 100644 --- a/src/Cryptol/Parser/LexerUtils.hs +++ b/src/Cryptol/Parser/LexerUtils.hs @@ -15,7 +15,7 @@ import Cryptol.Parser.Unlit(PreProc(None)) import Cryptol.Utils.PP import Cryptol.Utils.Panic -import Data.Char(toLower,generalCategory,isAscii,ord) +import Data.Char(toLower,generalCategory,isAscii,ord,isSpace) import qualified Data.Char as Char import Data.List(foldl') import Data.Text.Lazy (Text) @@ -152,7 +152,21 @@ mkIdent :: Action mkIdent cfg p s z = (Just Located { srcRange = r, thing = Token t s }, z) where r = Range { from = p, to = moves p s, source = cfgSource cfg } - t = Ident (T.unpack s) + t = Ident [] (T.unpack s) + +mkQualIdent :: Action +mkQualIdent cfg p s z = (Just Located { srcRange = r, thing = Token t s}, z) + where + r = Range { from = p, to = moves p s, source = cfgSource cfg } + t = Ident (map T.unpack ns) (T.unpack i) + (ns,i) = splitQual s + +mkQualOp :: Action +mkQualOp cfg p s z = (Just Located { srcRange = r, thing = Token t s}, z) + where + r = Range { from = p, to = moves p s, source = cfgSource cfg } + t = Op (Other (map T.unpack ns) (T.unpack i)) + (ns,i) = splitQual s emit :: TokenT -> Action emit t cfg p s z = (Just Located { srcRange = r, thing = Token t s }, z) @@ -163,6 +177,23 @@ emitS :: (String -> TokenT) -> Action emitS t cfg p s z = emit (t (T.unpack s)) cfg p s z +-- | Split out the prefix and name part of an identifier/operator. +splitQual :: T.Text -> ([T.Text], T.Text) +splitQual t = + case splitNS (T.filter (not . isSpace) t) of + [] -> panic "[Lexer] mkQualIdent" ["invalid qualified name", show t] + [i] -> ([], i) + xs -> (init xs, last xs) + + where + + -- split on the namespace separator, `::` + splitNS s = + case T.breakOn "::" s of + (l,r) | T.null r -> [l] + | otherwise -> l : splitNS (T.drop 2 r) + + -------------------------------------------------------------------------------- numToken :: Integer -> String -> TokenT @@ -355,7 +386,7 @@ data TokenKW = KW_Arith data TokenOp = Plus | Minus | Mul | Div | Exp | Mod | Equal | LEQ | GEQ | Complement | Hash - | Other String + | Other [String] String deriving (Eq,Show) data TokenSym = Bar @@ -368,7 +399,6 @@ data TokenSym = Bar | DotDot | DotDotDot | Colon - | ColonColon | BackTick | ParenL | ParenR | BracketL | BracketR @@ -387,7 +417,7 @@ data TokenErr = UnterminatedComment data TokenT = Num Integer Int Int -- ^ value, base, number of digits | ChrLit Char -- ^ character literal - | Ident String -- ^ identifier + | Ident [String] String -- ^ (qualified) identifier | StrLit String -- ^ string literal | KW TokenKW -- ^ keyword | Op TokenOp -- ^ operator diff --git a/src/Cryptol/Parser/ParserUtils.hs b/src/Cryptol/Parser/ParserUtils.hs index 469a9e4f..78d279eb 100644 --- a/src/Cryptol/Parser/ParserUtils.hs +++ b/src/Cryptol/Parser/ParserUtils.hs @@ -108,12 +108,11 @@ errorMessage r x = P $ \_ _ _ -> Left (HappyErrorMsg r x) customError :: String -> Located Token -> ParseM a customError x t = P $ \_ _ _ -> Left (HappyErrorMsg (srcRange t) x) -mkModName :: {-reversed-} [LName] -> Located ModName -mkModName xs = Located { srcRange = rComb (srcRange f) (srcRange l) - , thing = ModName [ x | Name x <- map thing ns ] - } - where l : _ = xs - ns@(f : _) = reverse xs +mkModName :: LQName -> Located ModName +mkModName = fmap $ \ (QName mb (Name n)) -> + case mb of + Just (ModName ns) -> ModName (ns ++ [n]) + Nothing -> ModName [n] mkQName :: {-reversed-} [LName] -> Located QName mkQName [x] = fmap mkUnqual x @@ -130,7 +129,7 @@ mkSchema xs ps t = Forall xs ps t Nothing getName :: Located Token -> Name getName l = case thing l of - Token (Ident x) _ -> Name x + Token (Ident [] x) _ -> Name x _ -> panic "[Parser] getName" ["not an Ident:", show l] getNum :: Located Token -> Integer @@ -159,12 +158,12 @@ intVal tok = Num x _ _ -> return x _ -> errorMessage (srcRange tok) "Expected an integer" -mkFixity :: Assoc -> Located Token -> [LQName] -> ParseM Decl +mkFixity :: Assoc -> Located Token -> [LName] -> ParseM Decl mkFixity assoc tok qns = do l <- intVal tok unless (l >= 1 && l <= 100) (errorMessage (srcRange tok) "Fixity levels must be between 0 and 20") - return (DFixity (Fixity assoc (fromInteger l)) qns) + return (DFixity (Fixity assoc (fromInteger l)) (map (fmap mkUnqual) qns)) mkTupleSel :: Range -> Integer -> ParseM (Located Selector) mkTupleSel pos n @@ -343,10 +342,10 @@ mkIf ifThens theElse = foldr addIfThen theElse ifThens -- pass. This is also the reason we add the doc to the TopLevel constructor, -- instead of just place it on the binding directly. A better solution might be -- to just have a different constructor for primitives. -mkPrimDecl :: Maybe (Located String) -> Bool -> LQName -> Schema -> [TopDecl] +mkPrimDecl :: Maybe (Located String) -> Bool -> LName -> Schema -> [TopDecl] mkPrimDecl mbDoc isInfix n sig = [ exportDecl mbDoc Public - $ DBind Bind { bName = n + $ DBind Bind { bName = qname , bParams = [] , bDef = at sig (Located emptyRange DPrim) , bSignature = Nothing @@ -357,8 +356,10 @@ mkPrimDecl mbDoc isInfix n sig = , bDoc = Nothing } , exportDecl Nothing Public - $ DSignature [n] sig + $ DSignature [qname] sig ] + where + qname = fmap mkUnqual n -- | Fix-up the documentation strings by removing the comment delimiters on each -- end, and stripping out common prefixes on all the remaining lines.