feat(lexer): improve grave accent lexing outside code blocks

This commit is contained in:
EmileRolley 2021-05-24 23:04:43 +02:00
parent bcb2303b13
commit 866a29a643
8 changed files with 72 additions and 32 deletions

View File

@ -532,6 +532,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
let compl_catala =
[%sedlex.regexp?
( Compl 'c'
| 'c', Compl 'a'
| "ca", Compl 't'
| "cat", Compl 'a'
| "cata", Compl 'l'
| "catal", Compl 'a'
| "catala", Compl (white_space | '\n') )]
in
match%sedlex lexbuf with
| "```catala" ->
is_code := true;
@ -565,11 +575,9 @@ let lex_law (lexbuf : lexbuf) : token =
( Compl ('#' | '`' | '>')
(* Following literals allow to match grave accents as long as they don't conflict with the
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
be followed by a white space or a newline character. *)
not be followed by 'catala'. *)
| Rep ('`', 1 .. 2), Compl '`'
| "```", (white_space | '\n')
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
| "```", compl_catala ) ->
LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme

View File

@ -492,6 +492,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
let compl_catala =
[%sedlex.regexp?
( Compl 'c'
| 'c', Compl 'a'
| "ca", Compl 't'
| "cat", Compl 'a'
| "cata", Compl 'l'
| "catal", Compl 'a'
| "catala", Compl (white_space | '\n') )]
in
match%sedlex lexbuf with
| "```catala" ->
L.is_code := true;
@ -525,11 +535,9 @@ let lex_law (lexbuf : lexbuf) : token =
( Compl ('#' | '`' | '>')
(* Following literals allow to match grave accents as long as they don't conflict with the
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
be followed by a white space or a newline character. *)
not be followed by 'catala'. *)
| Rep ('`', 1 .. 2), Compl '`'
| "```", (white_space | '\n')
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
| "```", compl_catala ) ->
LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme

View File

@ -495,6 +495,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
let compl_catala =
[%sedlex.regexp?
( Compl 'c'
| 'c', Compl 'a'
| "ca", Compl 't'
| "cat", Compl 'a'
| "cata", Compl 'l'
| "catal", Compl 'a'
| "catala", Compl (white_space | '\n') )]
in
match%sedlex lexbuf with
| "```catala" ->
L.is_code := true;
@ -532,11 +542,9 @@ let lex_law (lexbuf : lexbuf) : token =
( Compl ('#' | '`' | '>')
(* Following literals allow to match grave accents as long as they don't conflict with the
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
be followed by a white space or a newline character. *)
not be followed by 'catala'. *)
| Rep ('`', 1 .. 2), Compl '`'
| "```", (white_space | '\n')
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
| "```", compl_catala ) ->
LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme

View File

@ -499,6 +499,16 @@ let rec lex_code (lexbuf : lexbuf) : token =
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
let compl_catala =
[%sedlex.regexp?
( Compl 'c'
| 'c', Compl 'a'
| "ca", Compl 't'
| "cat", Compl 'a'
| "cata", Compl 'l'
| "catal", Compl 'a'
| "catala", Compl (white_space | '\n') )]
in
match%sedlex lexbuf with
| "```catala" ->
L.is_code := true;
@ -533,11 +543,9 @@ let lex_law (lexbuf : lexbuf) : token =
( Compl ('#' | '`' | '>')
(* Following literals allow to match grave accents as long as they don't conflict with the
[BEGIN_CODE] token, i.e. either there are no more than three consecutive ones or they must
be followed by a white space or a newline character. *)
not be followed by 'catala'. *)
| Rep ('`', 1 .. 2), Compl '`'
| "```", (white_space | '\n')
(* @note (EmileRolley): for a more permisive constraint, [white_space] could be replaced by
[Compl 'c'] but it lacks consistency in my opinion. *) ) ->
| "```", compl_catala ) ->
LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme

View File

@ -9,12 +9,10 @@ This allows to:
* use Fenced Code Blocks:
```
```ocaml
let () = print_endline "Hello world!"
```
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
```catala
new scope A:
param literate_parsing_is_ok content bool
@ -23,4 +21,8 @@ scope A:
def literate_parsing_is_ok := true
```
Even after `Catala` code block.
Even after `Catala` code block:
```c
int main(void) { return 0; }
```

View File

@ -9,12 +9,10 @@ This allows to:
* use Fenced Code Blocks:
```
```ocaml
let () = print_endline "Hello world!"
```
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
```catala
declaration scope A:
context literate_parsing_is_ok content boolean
@ -23,4 +21,8 @@ scope A:
definition literate_parsing_is_ok equals true
```
Even after `Catala` code block.
Even after `Catala` code block:
```c
int main(void) { return 0; }
```

View File

@ -9,12 +9,10 @@ This allows to:
* use Fenced Code Blocks:
```
```ocaml
let () = print_endline "Hello world!"
```
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
```catala
déclaration champ d'application A:
contexte literate_parsing_is_ok contenu booléen
@ -23,4 +21,8 @@ champ d'application A:
définition literate_parsing_is_ok égal à vrai
```
Even after `Catala` code block.
Even after `Catala` code block:
```c
int main(void) { return 0; }
```

View File

@ -9,12 +9,10 @@ This allows to:
* use Fenced Code Blocks:
```
```ocaml
let () = print_endline "Hello world!"
```
**Remark**: for Fenced Code Blocks, ``` must be followed by a space or a new line.
```catala
deklaracja zakres A:
kontekst literate_parsing_is_ok typu zerojedynkowy
@ -23,4 +21,8 @@ zakres A:
definicja literate_parsing_is_ok wynosi prawda
```
Even after `Catala` code block.
Even after `Catala` code block:
```c
int main(void) { return 0; }
```