New layout processor

Better handling of the interaction between virtual layout blocks and the
tokens that delimit them.
This commit is contained in:
Trevor Elliott 2014-09-03 13:25:14 -07:00
parent f3a9ad5fc7
commit 86918fb0f2
2 changed files with 51 additions and 90 deletions

View File

@ -209,11 +209,15 @@ primLexer cfg cs = run inp Normal
singleR p = Range p p (cfgSource cfg)
eofR p = Range p' p' (cfgSource cfg)
where
p' = Position { line = line p + 1, col = 0 }
run i s =
case alexScan i (stateToInt s) of
AlexEOF ->
case s of
Normal -> ([ Located (singleR $ alexPos i) (Token EOF "end of file") ]
Normal -> ([ Located (eofR $ alexPos i) (Token EOF "end of file") ]
, alexPos i
)
InComment p _ _ ->

View File

@ -18,6 +18,8 @@ import Data.Char(toLower)
import Data.Word(Word8)
import Codec.Binary.UTF8.String(encodeChar)
import Debug.Trace
data Config = Config
{ cfgSource :: !FilePath -- ^ File that we are working on
@ -209,112 +211,67 @@ data Block = Virtual Int -- ^ Virtual layout block
-- token.
deriving (Show)
isVirtual :: Block -> Bool
isVirtual Virtual {} = True
isVirtual _ = False
startsLayout :: TokenT -> Bool
startsLayout (KW KW_where) = True
startsLayout (KW KW_private) = True
startsLayout _ = False
-- Add separators computed from layout
layout :: Config -> [Located Token] -> [Located Token]
layout cfg ts0
| implicitScope = virt cfg pos0 VCurlyL : loop True [] ts0
| otherwise = loop False [] ts0
layout cfg ts0 = loop implicitScope [] ts0
where
(pos0,implicitScope) = case ts0 of
t : _ -> (from (srcRange t), cfgModuleScope cfg && tokenType (thing t) /= KW KW_module)
_ -> (start,False)
-- loop (are we first token in a block) (open block stack) tokens
loop :: Bool -> [Block] -> [Located Token] -> [Located Token]
loop startBlock stack (t : ts)
| startsLayout ty = toks ++ loop True stack' ts
| Sym ParenL <- ty = toks ++ loop False (Explicit (Sym ParenR) : stack') ts
| Sym CurlyL <- ty = toks ++ loop False (Explicit (Sym CurlyR) : stack') ts
| Sym BracketL <- ty = toks ++ loop False (Explicit (Sym BracketR) : stack') ts
| EOF <- ty = toks
| otherwise = toks ++ loop False stack' ts
where
ty = tokenType (thing t)
pos = srcRange t
(toks,offStack) = offsides startToks t stack
-- add any block start tokens, and push a level on the stack
(startToks,stack')
| startBlock = ( [ virt cfg (to pos) VCurlyL ], Virtual (col (from pos)) : offStack )
| otherwise = ( [], offStack )
loop _ _ [] = panic "[Lexer] layout" ["Missing EOF token"]
loop startBlock stack (t : ts)
-- If we find a lexical error, we just stop.
| Err _ <- ty = [t]
-- If we find the EOF, we close all open blocks, and then we stop.
| EOF <- ty = extra ++ [ virt cfg (to pos) VCurlyR | _ <- stack ] ++ [t]
-- Left parens start new explicit blocks
| Sym ParenL <- ty = parensClose ++ parensSep ++
t : loop False (Explicit (Sym ParenR) : parensStack) ts
-- Right parens close to the nearest explicit block, failing if they don't
-- properly close it
| Sym ParenR <- ty
, Explicit (Sym ParenR) : ps' <- ps = [ virt cfg (to pos) VCurlyR | _ <- es ]
++ t
: loop False ps' ts
-- If we see the keyword `where`, we start a new virtual block
| KW KW_where <- ty = t : virt cfg (to pos) VCurlyL
: loop True stack ts
-- If we see the keyword `private`, we start a new virtual block
| KW KW_private <- ty
, topLevel stack = punc ++ (t : virt cfg (to pos) VCurlyL
: loop True stack ts)
where ty = tokenType (thing t)
pos = srcRange t
extra | startBlock = [virt cfg (to pos) VCurlyR]
| otherwise = []
punc | startBlock = []
| otherwise = [virt cfg (to pos) VSemi]
-- closing tokens emitted by the parens special case
(parensClose,stack') =
(replicate (length cs) (virt cfg (to pos) VCurlyR), ps')
where
(cs,ps') = span check stack
-- don't include explicit blocks, or separators
check (Virtual p) = col (from pos) < p
check _ = False
parensSep
| Virtual p : _ <- stack'
, p == col (from pos) = [ virt cfg (to pos) VSemi ]
| otherwise = []
parensStack | startBlock = Virtual (col (from pos)) : stack'
| otherwise = stack'
(es,ps) = span isVirtual stack
-- We are the first token in a new block, push our column on the stack.
loop True ps (t : ts) = t : extra ++ loop startBlock ps' ts
offsides :: [Located Token] -> Located Token -> [Block] -> ([Located Token], [Block])
offsides startToks t = go startToks
where
ps' = Virtual c : ps
c = col (from (srcRange t))
go virts stack = case stack of
-- delimit or close a layout block
Virtual c : rest
| closingToken -> go (virt cfg (to pos) VCurlyR : virts) rest
| col (from pos) == c -> done (virt cfg (to pos) VSemi : virts) stack
| col (from pos) < c -> go (virt cfg (to pos) VCurlyR : virts) rest
-- close an explicit block
Explicit close : rest | close == ty -> done virts rest
_ -> done virts stack
ty = tokenType (thing t)
pos = srcRange t
(startBlock,extra)
| KW KW_private == tokenType (thing t) = (True,[virt cfg (to pos) VCurlyL])
| otherwise = (False,[])
-- We are not the first token in a block, check for virtual punctuation.
loop False ps@(Virtual p : ps') (t : ts)
| col pos == p = virt cfg pos VSemi -- same indent: add semi
: t
: loop False ps ts
| col pos < p = virt cfg pos VCurlyR -- less indent: add }
: loop False ps' (t : ts)
where
pos = from (srcRange t)
-- We are part of the currnet thing, just keep going
loop _ stack (t : ts) = t : loop False stack ts
-- Whether the stack contains a single top-level scope.
topLevel [_] = True
topLevel _ = False
done ts s = (reverse (t:ts), s)
closingToken = ty == Sym ParenR
|| ty == Sym Comma
virt :: Config -> Position -> TokenV -> Located Token
virt cfg pos x = Located { srcRange = Range