Extract Common Lexer Utilities

This commit is contained in:
Fabián Heredia Montiel 2020-05-20 15:27:50 -05:00
parent 5265c70c71
commit af85cbefa7
5 changed files with 52 additions and 40 deletions

View File

@ -82,6 +82,7 @@ modules =
Idris.Syntax,
Idris.Version,
Parser.Lexer.Common,
Parser.Lexer.Source,
Parser.Rule.Common,
Parser.Rule.Source,

View File

@ -82,6 +82,7 @@ modules =
Idris.Syntax,
Idris.Version,
Parser.Lexer.Common,
Parser.Lexer.Source,
Parser.Rule.Common,
Parser.Rule.Source,

View File

@ -82,6 +82,7 @@ modules =
Idris.Syntax,
Idris.Version,
Parser.Lexer.Common,
Parser.Lexer.Source,
Parser.Rule.Common,
Parser.Rule.Source,

View File

@ -0,0 +1,47 @@
module Parser.Lexer.Common
import public Text.Lexer
%default total
||| In `comment` we are careful not to parse closing delimiters as
||| valid comments. i.e. you may not write many dashes followed by
||| a closing brace and call it a valid comment.
export
comment : Lexer
comment
= is '-' <+> is '-' -- comment opener
<+> many (is '-') <+> reject (is '}') -- not a closing delimiter
<+> many (isNot '\n') -- till the end of line
-- Identifier Lexer
-- There are multiple variants.
public export
data Flavour = Capitalised | AllowDashes | Normal
export
isIdentStart : Flavour -> Char -> Bool
isIdentStart _ '_' = True
isIdentStart Capitalised x = isUpper x || x > chr 160
isIdentStart _ x = isAlpha x || x > chr 160
export
isIdentTrailing : Flavour -> Char -> Bool
isIdentTrailing AllowDashes '-' = True
isIdentTrailing _ '\'' = True
isIdentTrailing _ '_' = True
isIdentTrailing _ x = isAlphaNum x || x > chr 160
export %inline
isIdent : Flavour -> String -> Bool
isIdent flavour string =
case unpack string of
[] => False
(x::xs) => isIdentStart flavour x && all (isIdentTrailing flavour) xs
export %inline
ident : Flavour -> Lexer
ident flavour =
(pred $ isIdentStart flavour) <+>
(many . pred $ isIdentTrailing flavour)

View File

@ -1,9 +1,10 @@
module Parser.Lexer.Source
import public Parser.Lexer.Common
import Data.List
import Data.Strings
import public Text.Lexer
import Utils.Hex
import Utils.Octal
import Utils.String
@ -52,17 +53,7 @@ Show SourceToken where
dotSep [x] = x
dotSep (x :: xs) = x ++ concat ["." ++ y | y <- xs]
||| In `comment` we are careful not to parse closing delimiters as
||| valid comments. i.e. you may not write many dashes followed by
||| a closing brace and call it a valid comment.
comment : Lexer
comment
= is '-' <+> is '-' -- comment opener
<+> many (is '-') <+> reject (is '}') -- not a closing delimiter
<+> many (isNot '\n') -- till the end of line
mutual
||| The mutually defined functions represent different states in a
||| small automaton.
||| `toEndComment` is the default state and it will munch through
@ -111,35 +102,6 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
docComment : Lexer
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')
-- Identifier Lexer
-- There are multiple variants.
data Flavour = Capitalised | AllowDashes | Normal
isIdentStart : Flavour -> Char -> Bool
isIdentStart _ '_' = True
isIdentStart Capitalised x = isUpper x || x > chr 160
isIdentStart _ x = isAlpha x || x > chr 160
isIdentTrailing : Flavour -> Char -> Bool
isIdentTrailing AllowDashes '-' = True
isIdentTrailing _ '\'' = True
isIdentTrailing _ '_' = True
isIdentTrailing _ x = isAlphaNum x || x > chr 160
%inline
isIdent : Flavour -> String -> Bool
isIdent flavour string =
case unpack string of
[] => False
(x::xs) => isIdentStart flavour x && all (isIdentTrailing flavour) xs
%inline
ident : Flavour -> Lexer
ident flavour =
(pred $ isIdentStart flavour) <+>
(many . pred $ isIdentTrailing flavour)
export
isIdentNormal : String -> Bool
isIdentNormal = isIdent Normal