mirror of
https://github.com/CatalaLang/catala.git
synced 2024-09-20 00:41:05 +03:00
feat(lexer): improve grave accent lexing outside code blocks
This commit is contained in:
parent
bcb2303b13
commit
866a29a643
@ -532,6 +532,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
|
||||
let lex_law (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let prev_pos = lexing_positions lexbuf in
|
||||
let compl_catala =
|
||||
[%sedlex.regexp?
|
||||
( Compl 'c'
|
||||
| 'c', Compl 'a'
|
||||
| "ca", Compl 't'
|
||||
| "cat", Compl 'a'
|
||||
| "cata", Compl 'l'
|
||||
| "catal", Compl 'a'
|
||||
| "catala", Compl (white_space | '\n') )]
|
||||
in
|
||||
match%sedlex lexbuf with
|
||||
| "```catala" ->
|
||||
is_code := true;
|
||||
@ -565,11 +575,9 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
( Compl ('#' | '`' | '>')
|
||||
(* Following literals allow to match grave accents as long as they don't conflict with the
|
||||
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
|
||||
be followed by a white space or a newline character. *)
|
||||
not be followed by 'catala'. *)
|
||||
| Rep ('`', 1 .. 2), Compl '`'
|
||||
| "```", (white_space | '\n')
|
||||
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
|
||||
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
|
||||
| "```", compl_catala ) ->
|
||||
LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
|
@ -492,6 +492,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
|
||||
let lex_law (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let prev_pos = lexing_positions lexbuf in
|
||||
let compl_catala =
|
||||
[%sedlex.regexp?
|
||||
( Compl 'c'
|
||||
| 'c', Compl 'a'
|
||||
| "ca", Compl 't'
|
||||
| "cat", Compl 'a'
|
||||
| "cata", Compl 'l'
|
||||
| "catal", Compl 'a'
|
||||
| "catala", Compl (white_space | '\n') )]
|
||||
in
|
||||
match%sedlex lexbuf with
|
||||
| "```catala" ->
|
||||
L.is_code := true;
|
||||
@ -525,11 +535,9 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
( Compl ('#' | '`' | '>')
|
||||
(* Following literals allow to match grave accents as long as they don't conflict with the
|
||||
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
|
||||
be followed by a white space or a newline character. *)
|
||||
not be followed by 'catala'. *)
|
||||
| Rep ('`', 1 .. 2), Compl '`'
|
||||
| "```", (white_space | '\n')
|
||||
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
|
||||
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
|
||||
| "```", compl_catala ) ->
|
||||
LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
|
@ -495,6 +495,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
|
||||
let lex_law (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let prev_pos = lexing_positions lexbuf in
|
||||
let compl_catala =
|
||||
[%sedlex.regexp?
|
||||
( Compl 'c'
|
||||
| 'c', Compl 'a'
|
||||
| "ca", Compl 't'
|
||||
| "cat", Compl 'a'
|
||||
| "cata", Compl 'l'
|
||||
| "catal", Compl 'a'
|
||||
| "catala", Compl (white_space | '\n') )]
|
||||
in
|
||||
match%sedlex lexbuf with
|
||||
| "```catala" ->
|
||||
L.is_code := true;
|
||||
@ -532,11 +542,9 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
( Compl ('#' | '`' | '>')
|
||||
(* Following literals allow to match grave accents as long as they don't conflict with the
|
||||
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
|
||||
be followed by a white space or a newline character. *)
|
||||
not be followed by 'catala'. *)
|
||||
| Rep ('`', 1 .. 2), Compl '`'
|
||||
| "```", (white_space | '\n')
|
||||
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
|
||||
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
|
||||
| "```", compl_catala ) ->
|
||||
LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
|
@ -499,6 +499,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
|
||||
let lex_law (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let prev_pos = lexing_positions lexbuf in
|
||||
let compl_catala =
|
||||
[%sedlex.regexp?
|
||||
( Compl 'c'
|
||||
| 'c', Compl 'a'
|
||||
| "ca", Compl 't'
|
||||
| "cat", Compl 'a'
|
||||
| "cata", Compl 'l'
|
||||
| "catal", Compl 'a'
|
||||
| "catala", Compl (white_space | '\n') )]
|
||||
in
|
||||
match%sedlex lexbuf with
|
||||
| "```catala" ->
|
||||
L.is_code := true;
|
||||
@ -533,11 +543,9 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
( Compl ('#' | '`' | '>')
|
||||
(* Following literals allow to match grave accents as long as they don't conflict with the
|
||||
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
|
||||
be followed by a white space or a newline character. *)
|
||||
not be followed by 'catala'. *)
|
||||
| Rep ('`', 1 .. 2), Compl '`'
|
||||
| "```", (white_space | '\n')
|
||||
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
|
||||
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
|
||||
| "```", compl_catala ) ->
|
||||
LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
|
@ -9,12 +9,10 @@ This allows to:
|
||||
|
||||
* use Fenced Code Blocks:
|
||||
|
||||
```
|
||||
```ocaml
|
||||
let () = print_endline "Hello world!"
|
||||
```
|
||||
|
||||
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
|
||||
|
||||
```catala
|
||||
new scope A:
|
||||
param literate_parsing_is_ok content bool
|
||||
@ -23,4 +21,8 @@ scope A:
|
||||
def literate_parsing_is_ok := true
|
||||
```
|
||||
|
||||
Even after `Catala` code block.
|
||||
Even after `Catala` code block:
|
||||
|
||||
```c
|
||||
int main(void) { return 0; }
|
||||
```
|
||||
|
@ -9,12 +9,10 @@ This allows to:
|
||||
|
||||
* use Fenced Code Blocks:
|
||||
|
||||
```
|
||||
```ocaml
|
||||
let () = print_endline "Hello world!"
|
||||
```
|
||||
|
||||
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
|
||||
|
||||
```catala
|
||||
declaration scope A:
|
||||
context literate_parsing_is_ok content boolean
|
||||
@ -23,4 +21,8 @@ scope A:
|
||||
definition literate_parsing_is_ok equals true
|
||||
```
|
||||
|
||||
Even after `Catala` code block.
|
||||
Even after `Catala` code block:
|
||||
|
||||
```c
|
||||
int main(void) { return 0; }
|
||||
```
|
||||
|
@ -9,12 +9,10 @@ This allows to:
|
||||
|
||||
* use Fenced Code Blocks:
|
||||
|
||||
```
|
||||
```ocaml
|
||||
let () = print_endline "Hello world!"
|
||||
```
|
||||
|
||||
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
|
||||
|
||||
```catala
|
||||
déclaration champ d'application A:
|
||||
contexte literate_parsing_is_ok contenu booléen
|
||||
@ -23,4 +21,8 @@ champ d'application A:
|
||||
définition literate_parsing_is_ok égal à vrai
|
||||
```
|
||||
|
||||
Even after `Catala` code block.
|
||||
Even after `Catala` code block:
|
||||
|
||||
```c
|
||||
int main(void) { return 0; }
|
||||
```
|
||||
|
@ -9,12 +9,10 @@ This allows to:
|
||||
|
||||
* use Fenced Code Blocks:
|
||||
|
||||
```
|
||||
```ocaml
|
||||
let () = print_endline "Hello world!"
|
||||
```
|
||||
|
||||
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
|
||||
|
||||
```catala
|
||||
deklaracja zakres A:
|
||||
kontekst literate_parsing_is_ok typu zerojedynkowy
|
||||
@ -23,4 +21,8 @@ zakres A:
|
||||
definicja literate_parsing_is_ok wynosi prawda
|
||||
```
|
||||
|
||||
Even after `Catala` code block.
|
||||
Even after `Catala` code block:
|
||||
|
||||
```c
|
||||
int main(void) { return 0; }
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user