Merge pull request #1186 from GaloisInc/layout

Layout
This commit is contained in:
Iavor S. Diatchki 2021-05-10 14:12:04 -07:00 committed by GitHub
commit 142567d04c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 396 additions and 218 deletions

View File

@ -81,6 +81,8 @@ library
Exposed-modules: Cryptol.Parser,
Cryptol.Parser.Lexer,
Cryptol.Parser.Token,
Cryptol.Parser.Layout,
Cryptol.Parser.AST,
Cryptol.Parser.Position,
Cryptol.Parser.Names,

View File

@ -35,6 +35,7 @@ import Control.Monad(liftM2,msum)
import Cryptol.Parser.AST
import Cryptol.Parser.Position
import Cryptol.Parser.LexerUtils hiding (mkIdent)
import Cryptol.Parser.Token
import Cryptol.Parser.ParserUtils
import Cryptol.Parser.Unlit(PreProc(..), guessPreProc)
import Cryptol.Utils.Ident(paramInstModName)

View File

@ -0,0 +1,237 @@
{-# Language BlockArguments #-}
{-# Language OverloadedStrings #-}
module Cryptol.Parser.Layout where
import Cryptol.Utils.Panic(panic)
import Cryptol.Parser.Position
import Cryptol.Parser.Token
{-
We assume the existence of an explicit EOF token at the end of the input. This token is *less* indented
than all other tokens (i.e., it is at column 0)
Explicit Layout Blocks
* The symbols `(`, `{`, and `[` start an explicit layout block.
* While in an explicit layout block we pass through tokens, except:
- We may start new implicit or explicit layout blocks
- A `,` terminates any *nested* layout blocks
- We terminate the current layout block if we encounter the matching
closing symbol `)`, `}`, `]`
Implicit Layout Blocks
* The keywords `where`, `private`, and `parameter` start an implicit
layout block.
* The layout block starts at the column of the *following* token and we
insert "virtual start block" between the current and the following tokens.
* While in an implicit layout block:
- We may start new implicit or explicit layout blocks
- We insert a "virtual separator" before tokens starting at the same
column as the layout block, EXCEPT:
* we do not insert a separator if the previous token was a
"documentation comment"
* we do not insert a separator before the first token in the block
- The implicit layout block is ended by:
* a token than is less indented that the block, or
* `)`, `}`, `]`, or
* ',' but only if there is an outer paren block
block's column.
- When an implicit layout block ends, we insert a "virtual end block"
token just before the token that caused the block to end.
Examples:
f = x where x = 0x1 -- end implicit layout by layout
g = 0x3 -- (`g` is less indented than `x`)
f (x where x = 2) -- end implicit layout by `)`
[ x where x = 2, 3 ] -- end implicit layout by `,`
module A where -- two implicit layout blocks with the
private -- *same* indentation (`where` and `private`)
x = 0x2
-}
layout :: Bool -> [Located Token] -> [Located Token]
layout isMod ts0
-- Star an implicit layout block at the top of the module
| let t = head ts0
rng = srcRange t
blockCol = max 1 (col (from rng)) -- see startImplicitBlock
, isMod && tokenType (thing t) /= KW KW_module =
virt rng VCurlyL : go [ Virtual blockCol ] blockCol True ts0
| otherwise =
go [] 0 False ts0
where
{- State parameters for `go`:
stack:
The stack of implicit and explicit blocks
lastVirt:
The indentation of the outer most implicit block, or 0 if none.
This can be computed from the stack but we cache
it here as we need to check it on each token.
noVirtSep:
Do not emit a virtual separator even if token matches block alignment.
This is enabled at the beginning of a block, or after a doc string,
or if we just emitted a separtor, but have not yet consumed the
next token.
tokens:
remaining tokens to process
-}
go stack lastVirt noVirtSep tokens
-- End implicit layout due to indentation. If the outermost block
-- is a lyout block we just end it. If the outermost block is an
-- explicit layout block we report a lexical error.
| col curLoc < lastVirt =
endImplictBlock
-- End implicit layout block due to a symbol
| Just (Virtual {}) <- curBlock, endsLayout curTokTy =
endImplictBlock
-- End implicit layout block due to a comma
| Just (Virtual {}) <- curBlock
, Sym Comma <- curTokTy
, not (null [ () | Explicit _ <- popStack ]) =
endImplictBlock
-- Insert a virtual separator
| Just (Virtual {}) <- curBlock
, col curLoc == lastVirt && not noVirtSep =
virt curRange VSemi : go stack lastVirt True tokens
-- Start a new implicit layout. Advances token position.
| startsLayout curTokTy = startImplicitBlock
-- Start a paren block. Advances token position
| Just close <- startsParenBlock curTokTy =
curTok : go (Explicit close : stack) lastVirt False advanceTokens
-- End a paren block. Advances token position
| Just (Explicit close) <- curBlock, close == curTokTy =
curTok : go popStack lastVirt False advanceTokens
-- Disable virtual separator after doc string. Advances token position
| White DocStr <- curTokTy =
curTok : go stack lastVirt True advanceTokens
-- Check to see if we are done. Note that if we got here, implicit layout
-- blocks should have already been closed, as `EOF` is less indented than
-- all other tokens
| EOF <- curTokTy =
[curTok]
-- Any other token, just emit. Advances token position
| otherwise =
curTok : go stack lastVirt False advanceTokens
where
curTok : advanceTokens = tokens
curTokTy = tokenType (thing curTok)
curRange = srcRange curTok
curLoc = from curRange
(curBlock,popStack) =
case stack of
a : b -> (Just a,b)
[] -> (Nothing, panic "layout" ["pop empty stack"])
startImplicitBlock =
let nextRng = srcRange (head advanceTokens)
nextLoc = from nextRng
blockCol = max 1 (col nextLoc)
-- the `max` ensuraes that indentation is always at least 1,
-- in case we are starting a block at the very end of the input
in curTok
: virt nextRng VCurlyL
: go (Virtual blockCol : stack) blockCol True advanceTokens
endImplictBlock =
case curBlock of
Just (Virtual {}) ->
virt curRange VCurlyR
: go popStack newVirt False tokens
where newVirt = case [ n | Virtual n <- popStack ] of
n : _ -> n
_ -> 0
Just (Explicit c) ->
errTok curRange (InvalidIndentation c) : advanceTokens
Nothing -> panic "layout" ["endImplictBlock with empty stack"]
--------------------------------------------------------------------------------
data Block =
Virtual Int -- ^ Virtual layout block
| Explicit TokenT -- ^ An explicit layout block, expecting this ending token.
deriving (Show)
-- | These tokens start an implicit layout block
startsLayout :: TokenT -> Bool
startsLayout ty =
case ty of
KW KW_where -> True
KW KW_private -> True
KW KW_parameter -> True
_ -> False
-- | These tokens end an implicit layout block
endsLayout :: TokenT -> Bool
endsLayout ty =
case ty of
Sym BracketR -> True
Sym ParenR -> True
Sym CurlyR -> True
_ -> False
-- | These tokens start an explicit "paren" layout block.
-- If so, the result contains the corresponding closing paren.
startsParenBlock :: TokenT -> Maybe TokenT
startsParenBlock ty =
case ty of
Sym BracketL -> Just (Sym BracketR)
Sym ParenL -> Just (Sym ParenR)
Sym CurlyL -> Just (Sym CurlyR)
_ -> Nothing
--------------------------------------------------------------------------------
-- | Make a virtual token of the given type
virt :: Range -> TokenV -> Located Token
virt rng x = Located { srcRange = rng { to = from rng }, thing = t }
where
t = Token (Virt x)
case x of
VCurlyL -> "beginning of layout block"
VCurlyR -> "end of layout block"
VSemi -> "layout block separator"
errTok :: Range -> TokenErr -> Located Token
errTok rng x = Located { srcRange = rng { to = from rng }, thing = t }
where
t = Token { tokenType = Err x, tokenText = "" }

View File

@ -18,10 +18,13 @@ module Cryptol.Parser.Lexer
, Located(..)
, Config(..)
, defaultConfig
, dbgLex
) where
import Cryptol.Parser.Position
import Cryptol.Parser.Token
import Cryptol.Parser.LexerUtils
import qualified Cryptol.Parser.Layout as L
import Cryptol.Parser.Unlit(unLit)
import Data.Text (Text)
import qualified Data.Text as Text
@ -195,7 +198,7 @@ stateToInt (InChar {}) = char
-- This stream is fed to the parser.
lexer :: Config -> Text -> ([Located Token], Position)
lexer cfg cs = ( case cfgLayout cfg of
Layout -> layout cfg lexemes
Layout -> L.layout (cfgModuleScope cfg) lexemes
NoLayout -> lexemes
, finalPos
)
@ -253,5 +256,10 @@ primLexer cfg cs = run inp Normal
(rest,pos) = run i' $! s'
in (mtok ++ rest, pos)
dbgLex file =
do txt <- readFile file
let (ts,_) = lexer defaultConfig (Text.pack txt)
mapM_ (print . thing) ts
-- vim: ft=haskell
}

View File

@ -5,19 +5,9 @@
-- Maintainer : cryptol@galois.com
-- Stability : provisional
-- Portability : portable
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE BlockArguments #-}
module Cryptol.Parser.LexerUtils where
import Cryptol.Parser.Position
import Cryptol.Parser.Unlit(PreProc(None))
import Cryptol.Utils.PP
import Cryptol.Utils.Panic
import Control.Monad(guard)
import Data.Char(toLower,generalCategory,isAscii,ord,isSpace,
isAlphaNum,isAlpha)
@ -27,8 +17,12 @@ import qualified Data.Text as T
import qualified Data.Text.Read as T
import Data.Word(Word8)
import GHC.Generics (Generic)
import Control.DeepSeq
import Cryptol.Utils.Panic
import Cryptol.Parser.Position
import Cryptol.Parser.Token
import Cryptol.Parser.Unlit(PreProc(None))
data Config = Config
{ cfgSource :: !FilePath -- ^ File that we are working on
@ -362,210 +356,6 @@ dropWhite = filter (notWhite . tokenType . thing)
notWhite _ = True
data Block = Virtual Int -- ^ Virtual layout block
| Explicit TokenT -- ^ An explicit layout block, expecting this ending
-- token.
deriving (Show)
isExplicit :: Block -> Bool
isExplicit Explicit{} = True
isExplicit Virtual{} = False
startsLayout :: TokenT -> Bool
startsLayout (KW KW_where) = True
startsLayout (KW KW_private) = True
startsLayout (KW KW_parameter) = True
startsLayout _ = False
-- Add separators computed from layout
layout :: Config -> [Located Token] -> [Located Token]
layout cfg ts0 = loop False implicitScope [] ts0
where
(_pos0,implicitScope) = case ts0 of
t : _ -> (from (srcRange t), cfgModuleScope cfg && tokenType (thing t) /= KW KW_module)
_ -> (start,False)
loop :: Bool -> Bool -> [Block] -> [Located Token] -> [Located Token]
loop afterDoc startBlock stack (t : ts)
| startsLayout ty = toks ++ loop False True stack' ts
-- We don't do layout within these delimeters
| Sym ParenL <- ty = toks ++ loop False False (Explicit (Sym ParenR) : stack') ts
| Sym CurlyL <- ty = toks ++ loop False False (Explicit (Sym CurlyR) : stack') ts
| Sym BracketL <- ty = toks ++ loop False False (Explicit (Sym BracketR) : stack') ts
| EOF <- ty = toks
| White DocStr <- ty = toks ++ loop True False stack' ts
| otherwise = toks ++ loop False False stack' ts
where
ty = tokenType (thing t)
pos = srcRange t
(toks,offStack)
| afterDoc = ([t], stack)
| otherwise = offsides startToks t stack
-- add any block start tokens, and push a level on the stack
(startToks,stack')
| startBlock && ty == EOF = ( [ virt cfg (to pos) VCurlyR
, virt cfg (to pos) VCurlyL ]
, offStack )
| startBlock = ( [ virt cfg (to pos) VCurlyL ], Virtual (col (from pos)) : offStack )
| otherwise = ( [], offStack )
loop _ _ _ [] = panic "[Lexer] layout" ["Missing EOF token"]
offsides :: [Located Token] -> Located Token -> [Block] -> ([Located Token], [Block])
offsides startToks t = go startToks
where
go virts stack = case stack of
-- delimit or close a layout block
Virtual c : rest
-- commas only close to an explicit marker, so if there is none, the
-- comma doesn't close anything
| Sym Comma == ty ->
if any isExplicit rest
then go (virt cfg (to pos) VCurlyR : virts) rest
else done virts stack
| closingToken -> go (virt cfg (to pos) VCurlyR : virts) rest
| col (from pos) == c -> done (virt cfg (to pos) VSemi : virts) stack
| col (from pos) < c -> go (virt cfg (to pos) VCurlyR : virts) rest
-- close an explicit block
Explicit close : rest | close == ty -> done virts rest
| Sym Comma == ty -> done virts stack
_ -> done virts stack
ty = tokenType (thing t)
pos = srcRange t
done ts s = (reverse (t:ts), s)
closingToken = ty `elem` [ Sym ParenR, Sym BracketR, Sym CurlyR ]
virt :: Config -> Position -> TokenV -> Located Token
virt cfg pos x = Located { srcRange = Range
{ from = pos
, to = pos
, source = cfgSource cfg
}
, thing = t }
where t = Token (Virt x) $ case x of
VCurlyL -> "beginning of layout block"
VCurlyR -> "end of layout block"
VSemi -> "layout block separator"
--------------------------------------------------------------------------------
data Token = Token { tokenType :: !TokenT, tokenText :: !Text }
deriving (Show, Generic, NFData)
-- | Virtual tokens, inserted by layout processing.
data TokenV = VCurlyL| VCurlyR | VSemi
deriving (Eq, Show, Generic, NFData)
data TokenW = BlockComment | LineComment | Space | DocStr
deriving (Eq, Show, Generic, NFData)
data TokenKW = KW_else
| KW_extern
| KW_fin
| KW_if
| KW_private
| KW_include
| KW_inf
| KW_lg2
| KW_lengthFromThen
| KW_lengthFromThenTo
| KW_max
| KW_min
| KW_module
| KW_submodule
| KW_newtype
| KW_pragma
| KW_property
| KW_then
| KW_type
| KW_where
| KW_let
| KW_x
| KW_import
| KW_as
| KW_hiding
| KW_infixl
| KW_infixr
| KW_infix
| KW_primitive
| KW_parameter
| KW_constraint
| KW_Prop
deriving (Eq, Show, Generic, NFData)
-- | The named operators are a special case for parsing types, and 'Other' is
-- used for all other cases that lexed as an operator.
data TokenOp = Plus | Minus | Mul | Div | Exp | Mod
| Equal | LEQ | GEQ
| Complement | Hash | At
| Other [T.Text] T.Text
deriving (Eq, Show, Generic, NFData)
data TokenSym = Bar
| ArrL | ArrR | FatArrR
| Lambda
| EqDef
| Comma
| Semi
| Dot
| DotDot
| DotDotDot
| DotDotLt
| Colon
| BackTick
| ParenL | ParenR
| BracketL | BracketR
| CurlyL | CurlyR
| TriL | TriR
| Lt
| Underscore
deriving (Eq, Show, Generic, NFData)
data TokenErr = UnterminatedComment
| UnterminatedString
| UnterminatedChar
| InvalidString
| InvalidChar
| LexicalError
| MalformedLiteral
| MalformedSelector
deriving (Eq, Show, Generic, NFData)
data SelectorType = RecordSelectorTok Text | TupleSelectorTok Int
deriving (Eq, Show, Generic, NFData)
data TokenT = Num !Integer !Int !Int -- ^ value, base, number of digits
| Frac !Rational !Int -- ^ value, base.
| ChrLit !Char -- ^ character literal
| Ident ![T.Text] !T.Text -- ^ (qualified) identifier
| StrLit !String -- ^ string literal
| Selector !SelectorType -- ^ .hello or .123
| KW !TokenKW -- ^ keyword
| Op !TokenOp -- ^ operator
| Sym !TokenSym -- ^ symbol
| Virt !TokenV -- ^ virtual token (for layout)
| White !TokenW -- ^ white space token
| Err !TokenErr -- ^ error token
| EOF
deriving (Eq, Show, Generic, NFData)
instance PP Token where
ppPrec _ (Token _ s) = text (T.unpack s)
-- | Collapse characters into a single Word8, identifying ASCII, and classes of
-- unicode. This came from:

View File

@ -35,7 +35,7 @@ import Prelude.Compat
import Cryptol.Parser.AST
import Cryptol.Parser.Lexer
import Cryptol.Parser.LexerUtils(SelectorType(..))
import Cryptol.Parser.Token(SelectorType(..))
import Cryptol.Parser.Position
import Cryptol.Parser.Utils (translateExprToNumT,widthIdent)
import Cryptol.Utils.Ident(packModName,packIdent,modNameChunks)
@ -81,6 +81,12 @@ lexerP k = P $ \cfg p s ->
T.unpack (tokenText it)
MalformedSelector -> "malformed selector: " ++
T.unpack (tokenText it)
InvalidIndentation c -> "invalid indentation, unmatched " ++
case c of
Sym CurlyR -> "{ ... } "
Sym ParenR -> "( ... )"
Sym BracketR -> "[ ... ]"
_ -> show c -- basically panic
]
where it = thing t

116
src/Cryptol/Parser/Token.hs Normal file
View File

@ -0,0 +1,116 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
module Cryptol.Parser.Token where
import Data.Text(Text)
import qualified Data.Text as Text
import Control.DeepSeq
import GHC.Generics
import Cryptol.Utils.PP
data Token = Token { tokenType :: !TokenT, tokenText :: !Text }
deriving (Show, Generic, NFData)
-- | Virtual tokens, inserted by layout processing.
data TokenV = VCurlyL| VCurlyR | VSemi
deriving (Eq, Show, Generic, NFData)
data TokenW = BlockComment | LineComment | Space | DocStr
deriving (Eq, Show, Generic, NFData)
data TokenKW = KW_else
| KW_extern
| KW_fin
| KW_if
| KW_private
| KW_include
| KW_inf
| KW_lg2
| KW_lengthFromThen
| KW_lengthFromThenTo
| KW_max
| KW_min
| KW_module
| KW_submodule
| KW_newtype
| KW_pragma
| KW_property
| KW_then
| KW_type
| KW_where
| KW_let
| KW_x
| KW_import
| KW_as
| KW_hiding
| KW_infixl
| KW_infixr
| KW_infix
| KW_primitive
| KW_parameter
| KW_constraint
| KW_Prop
deriving (Eq, Show, Generic, NFData)
-- | The named operators are a special case for parsing types, and 'Other' is
-- used for all other cases that lexed as an operator.
data TokenOp = Plus | Minus | Mul | Div | Exp | Mod
| Equal | LEQ | GEQ
| Complement | Hash | At
| Other [Text] Text
deriving (Eq, Show, Generic, NFData)
data TokenSym = Bar
| ArrL | ArrR | FatArrR
| Lambda
| EqDef
| Comma
| Semi
| Dot
| DotDot
| DotDotDot
| DotDotLt
| Colon
| BackTick
| ParenL | ParenR
| BracketL | BracketR
| CurlyL | CurlyR
| TriL | TriR
| Lt
| Underscore
deriving (Eq, Show, Generic, NFData)
data TokenErr = UnterminatedComment
| UnterminatedString
| UnterminatedChar
| InvalidString
| InvalidChar
| LexicalError
| MalformedLiteral
| MalformedSelector
| InvalidIndentation TokenT -- expected closing paren
deriving (Eq, Show, Generic, NFData)
data SelectorType = RecordSelectorTok Text | TupleSelectorTok Int
deriving (Eq, Show, Generic, NFData)
data TokenT = Num !Integer !Int !Int -- ^ value, base, number of digits
| Frac !Rational !Int -- ^ value, base.
| ChrLit !Char -- ^ character literal
| Ident ![Text] !Text -- ^ (qualified) identifier
| StrLit !String -- ^ string literal
| Selector !SelectorType -- ^ .hello or .123
| KW !TokenKW -- ^ keyword
| Op !TokenOp -- ^ operator
| Sym !TokenSym -- ^ symbol
| Virt !TokenV -- ^ virtual token (for layout)
| White !TokenW -- ^ white space token
| Err !TokenErr -- ^ error token
| EOF
deriving (Eq, Show, Generic, NFData)
instance PP Token where
ppPrec _ (Token _ s) = text (Text.unpack s)

5
tests/issues/T1179.cry Normal file
View File

@ -0,0 +1,5 @@
module T1179 where
x = 0x2
private
y = x

1
tests/issues/T1179.icry Normal file
View File

@ -0,0 +1 @@
:load T1179.cry

View File

@ -0,0 +1,3 @@
Loading module Cryptol
Loading module Cryptol
Loading module T1179

View File

@ -0,0 +1,4 @@
module Layout02 where
x = 0x1 where y = ( 2
)

View File

@ -0,0 +1 @@
:load Layout02.cry

View File

@ -0,0 +1,4 @@
Loading module Cryptol
Parse error at Layout02.cry:4:11--4:11
invalid indentation, unmatched ( ... )