Handle multiline comments in Package (ipkg) (#3386)

This commit is contained in:
Boyd Stephen Smith Jr. 2024-09-24 10:26:30 -05:00 committed by GitHub
parent f3dca12a5f
commit 0e83d6c7c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 81 additions and 54 deletions

View File

@ -49,6 +49,8 @@ This CHANGELOG describes the merged but unreleased changes. Please see [CHANGELO
* Support for macOS PowerPC added.
* Multiline comments `{- text -}` are now supported in ipkg files.
### Language changes
* Autobind and Typebind modifier on operators allow the user to

View File

@ -15,9 +15,11 @@ André Videla
Andy Lok
Anthony Lodi
Arnaud Bailly
Boyd Stephen Smith Jr.
Brian Wignall
Bryn Keller
Christian Rasmussen
Claudio Etterli
Cyrill Brunner
Danylo Lapirov
David Smith
@ -41,11 +43,11 @@ Johann Rudloff
Kamil Shakirov
Kevin Boulain
LuoChen
Marc Petit-Huguenin
MarcelineVQ
Marc Petit-Huguenin
Marshall Bowers
Matthew Mosior
Mathew Polzin
Matthew Mosior
Matthew Wilson
Matus Tejiscak
Michael Messer
@ -58,9 +60,9 @@ Nicolas Biri
Niklas Larsson
Ohad Kammar
Peter Hajdu
rhiannon morris
Ricardo Prado Cunha
Robert Walter
rhiannon morris
Rodrigo Oliveira
Rohit Grover
Rui Barreiro
@ -74,14 +76,13 @@ then0rTh
Theo Butler
Thomas Dziedzic
Thomas E. Hansen
Tim Süberkrüb
Timmy Jose
Tim Süberkrüb
Tom Harley
troiganto
Wen Kokke
Wind Wong
Zoe Stafford
Claudio Etterli
Apologies to anyone we've missed - let us know and we'll correct it (or just
send a PR with the correction).

View File

@ -14,6 +14,56 @@ comment
<+> many (is '-') <+> reject (is '}') -- not a closing delimiter
<+> many (isNot '\n') -- till the end of line
mutual
||| The mutually defined functions represent different states in a
||| small automaton.
||| `toEndComment` is the default state and it will munch through
||| the input until we detect a special character (a dash, an
||| opening brace, or a double quote) and then switch to the
||| appropriate state.
toEndComment : (k : Nat) -> Recognise (k /= 0)
toEndComment Z = empty
toEndComment (S k)
= some (pred (\c => c /= '-' && c /= '{' && c /= '"' && c /= '\''))
<+> (eof <|> toEndComment (S k))
<|> is '{' <+> singleBrace k
<|> is '-' <+> singleDash k
<|> (charLit <|> pred (== '\'')) <+> toEndComment (S k)
<|> stringLit <+> toEndComment (S k)
||| After reading a single brace, we may either finish reading an
||| opening delimiter or ignore it (e.g. it could be an implicit
||| binder).
singleBrace : (k : Nat) -> Lexer
singleBrace k
= is '-' <+> many (is '-') -- opening delimiter
<+> (eof <|> singleDash (S k)) -- `singleDash` handles the {----} special case
-- `eof` handles the file ending with {---
<|> toEndComment (S k) -- not a valid comment
||| After reading a single dash, we may either find another one,
||| meaning we may have started reading a line comment, or find
||| a closing brace meaning we have found a closing delimiter.
singleDash : (k : Nat) -> Lexer
singleDash k
= is '-' <+> doubleDash k -- comment or closing delimiter
<|> is '}' <+> toEndComment k -- closing delimiter
<|> toEndComment (S k) -- not a valid comment
||| After reading a double dash, we are potentially reading a line
||| comment unless the series of uninterrupted dashes is ended with
||| a closing brace in which case it is a closing delimiter.
doubleDash : (k : Nat) -> Lexer
doubleDash k = with Prelude.(::)
many (is '-') <+> choice -- absorb all dashes
[ is '}' <+> toEndComment k -- closing delimiter
, many (isNot '\n') <+> toEndComment (S k) -- line comment
]
export
blockComment : Lexer
blockComment = is '{' <+> is '-' <+> many (is '-') <+> (eof <|> toEndComment 1)
-- Identifier Lexer
-- There are multiple variants.

View File

@ -99,6 +99,7 @@ andop = is '&' <+> is '&'
rawTokens : TokenMap Token
rawTokens =
[ (comment, Comment . drop 2)
, (blockComment, Comment . shrink 2)
, (namespacedIdent, uncurry DotSepIdent . mkNamespacedIdent)
, (identAllowDashes, DotSepIdent Nothing)
, (separator, const Separator)

View File

@ -140,55 +140,6 @@ Pretty Void Token where
pretty (MagicDebugInfo di) = pretty (show di)
pretty (Unrecognised x) = pretty "Unrecognised" <++> pretty x
mutual
||| The mutually defined functions represent different states in a
||| small automaton.
||| `toEndComment` is the default state and it will munch through
||| the input until we detect a special character (a dash, an
||| opening brace, or a double quote) and then switch to the
||| appropriate state.
toEndComment : (k : Nat) -> Recognise (k /= 0)
toEndComment Z = empty
toEndComment (S k)
= some (pred (\c => c /= '-' && c /= '{' && c /= '"' && c /= '\''))
<+> (eof <|> toEndComment (S k))
<|> is '{' <+> singleBrace k
<|> is '-' <+> singleDash k
<|> (charLit <|> pred (== '\'')) <+> toEndComment (S k)
<|> stringLit <+> toEndComment (S k)
||| After reading a single brace, we may either finish reading an
||| opening delimiter or ignore it (e.g. it could be an implicit
||| binder).
singleBrace : (k : Nat) -> Lexer
singleBrace k
= is '-' <+> many (is '-') -- opening delimiter
<+> (eof <|> singleDash (S k)) -- `singleDash` handles the {----} special case
-- `eof` handles the file ending with {---
<|> toEndComment (S k) -- not a valid comment
||| After reading a single dash, we may either find another one,
||| meaning we may have started reading a line comment, or find
||| a closing brace meaning we have found a closing delimiter.
singleDash : (k : Nat) -> Lexer
singleDash k
= is '-' <+> doubleDash k -- comment or closing delimiter
<|> is '}' <+> toEndComment k -- closing delimiter
<|> toEndComment (S k) -- not a valid comment
||| After reading a double dash, we are potentially reading a line
||| comment unless the series of uninterrupted dashes is ended with
||| a closing brace in which case it is a closing delimiter.
doubleDash : (k : Nat) -> Lexer
doubleDash k = with Prelude.(::)
many (is '-') <+> choice -- absorb all dashes
[ is '}' <+> toEndComment k -- closing delimiter
, many (isNot '\n') <+> toEndComment (S k) -- line comment
]
blockComment : Lexer
blockComment = is '{' <+> is '-' <+> many (is '-') <+> (eof <|> toEndComment 1)
docComment : Lexer
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')

View File

@ -0,0 +1,4 @@
module Main
main : IO ()
main = putStrLn "CouCou!"

View File

@ -0,0 +1,2 @@
1/1: Building Main (Main.idr)
Now compiling the executable: test

View File

@ -0,0 +1,4 @@
. ../../../testutils.sh
# Previously reported: Uncaught error: Error: Can't recognise token.
idris2 --build test.ipkg

View File

@ -0,0 +1,12 @@
package test
{-
A test of
multiline
comments.
-}
version = 0.1.0
authors = "Boyd Stephen Smith Jr."
main = Main
executable = "test"
{- "." is the top-level package directory -}
sourcedir = "."