Merge pull request #328 from noahhaasis/escapeCharacters

Implement string escaping
This commit is contained in:
Paul Chiusano 2019-02-08 23:28:04 -05:00 committed by GitHub
commit b94eefdd98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 5 deletions

View File

@ -25,6 +25,7 @@ data Err
| MissingFractional String -- ex `1.` rather than `1.04`
| UnknownLexeme
| TextLiteralMissingClosingQuote String
| InvalidEscapeCharacter Char
| LayoutError
deriving (Eq,Ord,Show) -- richer algebra
@ -272,11 +273,11 @@ lexer0 scope rem =
Just _ -> Token (Reserved "->") pos end : goWhitespace l end rem
Nothing -> Token (Err LayoutError) pos pos : recover l pos rem
-- string literals and backticked identifiers
'"' : rem -> span' (/= '"') rem $ \(lit, rem) ->
if rem == [] then
[Token (Err (TextLiteralMissingClosingQuote lit)) pos pos]
else let end = inc . incBy lit . inc $ pos in
Token (Textual lit) pos end : goWhitespace l end (pop rem)
'"' : rem -> case splitStringLit rem of
Right (lit, rem) -> let end = inc . incBy lit . inc $ pos in
Token (Textual lit) pos end : goWhitespace l end (pop rem)
Left (TextLiteralMissingClosingQuote _) -> [Token (Err $ TextLiteralMissingClosingQuote rem) pos pos]
Left err -> [Token (Err err) pos pos]
'`' : rem -> case wordyId rem of
Left e -> Token (Err e) pos pos : recover l pos rem
Right (id, rem) ->
@ -315,6 +316,35 @@ matchKeyword' keywords s = case span (not . isSep) s of
(kw, rem) | Set.member kw keywords -> Just (kw, rem)
_ -> Nothing
-- Split into a string literal and the remainder
-- The input string should only start with a '"' if the string literal is empty
splitStringLit :: String -> Either Err (String, String)
splitStringLit ('\\':s:rem) = case parseEscapeChar s of
(Just e) -> appendFst e <$> splitStringLit rem
Nothing -> Left $ InvalidEscapeCharacter s
splitStringLit ('"':rem) = Right ("", '"':rem)
splitStringLit (x:rem) = appendFst x <$> splitStringLit rem
splitStringLit [] = Left $ TextLiteralMissingClosingQuote ""
appendFst :: Char -> (String, a) -> (String, a)
appendFst c (s, r) = (c : s, r)
-- Map a escape symbol to it's character literal
parseEscapeChar :: Char -> Maybe Char
parseEscapeChar '0' = Just '\0'
parseEscapeChar 'a' = Just '\a'
parseEscapeChar 'b' = Just '\b'
parseEscapeChar 'f' = Just '\f'
parseEscapeChar 'n' = Just '\n'
parseEscapeChar 'r' = Just '\r'
parseEscapeChar 't' = Just '\t'
parseEscapeChar 'v' = Just '\v'
parseEscapeChar '\'' = Just '\''
parseEscapeChar '"' = Just '"'
parseEscapeChar '\\' = Just '\\'
parseEscapeChar _ = Nothing
numericLit :: String -> Either Err (Maybe (String,String))
numericLit s = go s
where

View File

@ -51,6 +51,10 @@ test = scope "lexer" . tests $
, t "if x then else" [Open "if", WordyId "x", Close, Open "then", Reserved "else", Close]
-- Empty `else` clause
, t "if x then 1 else" [Open "if", WordyId "x", Close, Open "then", Numeric "1", Close, Open "else", Close]
-- Test string literals
, t "\"simple string without escape characters\"" [Textual "simple string without escape characters"]
, t "\"test escaped quotes \\\"in quotes\\\"\"" [Textual "test escaped quotes \"in quotes\""]
, t "\"\\n \\t \\b \\a\"" [Textual "\n \t \b \a"]
]
t :: String -> [Lexeme] -> Test ()