catala/compiler/surface/lexer_common.mli

(* This file is part of the Catala compiler, a specification language for tax
   and social benefits computation rules. Copyright (C) 2020 Inria,
   contributors: Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley
   <emile.rolley@tuta.io>

   Licensed under the Apache License, Version 2.0 (the "License"); you may not
   use this file except in compliance with the License. You may obtain a copy of
   the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
   License for the specific language governing permissions and limitations under
   the License. *)

(** Auxiliary functions used by all lexers. *)

type lexing_context = Law | Raw | Code | Directive | Directive_args

val context : lexing_context ref
(** Reference, used by the lexer as the mutable state to distinguish whether it
    is lexing code or law. *)

val code_buffer : Buffer.t
(** Buffer that accumulates the string representation of the body of code being
    lexed. This string representation is used in the literate programming
    backends to faithfully capture the spacing pattern of the original program *)

val update_acc : Sedlexing.lexbuf -> unit
(** Updates {!val:code_buffer} with the current lexeme *)

val raise_lexer_error : Catala_utils.Pos.t -> string -> 'a
(** Error-generating helper *)

val token_list_language_agnostic : (string * Tokens.token) list
(** Associative list matching each punctuation string part of the Catala syntax
    with its {!Surface.Parser} token. Same for all the input languages (English,
    French, etc.) *)

val calc_precedence : string -> int
(** Calculates the precedence according a matched regex of the form : '[#]+' *)

val get_law_heading : Sedlexing.lexbuf -> Tokens.token
(** Gets the [LAW_HEADING] token from the current [lexbuf] *)

(** Simplified tokens for dependency extraction *)
type line_token =
  | LINE_TEST of string (* ```catala-test { id = xx } *)
  | LINE_INLINE_TEST (* ```catala-test-inline *)
  | LINE_BLOCK_END (* ``` *)
  | LINE_INCLUDE of string (* > Include foo.catala_en *)
  | LINE_MODULE_DEF of string * bool (* > Module Xxx [external] *)
  | LINE_MODULE_USE of string (* > Using Xxx [as Yyy] *)
  | LINE_ANY (* anything else *)

module type LocalisedLexer = sig
  val token_list : (string * Tokens.token) list
  (** Same as {!val:Surface.Lexer_common.token_list_language_agnostic}, but with
      tokens whose string varies with the input language. *)

  val lex_builtin : string -> Ast.builtin_expression option
  (** Simple lexer for builtins *)

  val lex_code : Sedlexing.lexbuf -> Tokens.token
  (** Main lexing function used in a code block *)

  val lex_law : Sedlexing.lexbuf -> Tokens.token
  (** Main lexing function used outside code blocks *)

  val lexer : Sedlexing.lexbuf -> Tokens.token
  (** Entry point of the lexer, distributes to {!val:lex_code} or {!val:lex_law}
      depending of the current {!val:Surface.Lexer_common.context}. *)

  val lex_line : Sedlexing.lexbuf -> (string * line_token) option
  (** Low-level lexer intended for dependency extraction. The whole line
      (including ["\n"] is always returned together with the token. [None] for
      EOF. *)
end
refactor(lexer): factorize the law heading exctraction in a new module : Lexer_common 2021-03-09 22:57:41 +03:00			`(* This file is part of the Catala compiler, a specification language for tax`
docs: update contributions in file comments 2021-05-27 19:56:47 +03:00			`and social benefits computation rules. Copyright (C) 2020 Inria,`
			`contributors: Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley`
			`<emile.rolley@tuta.io>`
refactor(lexer): factorize the law heading exctraction in a new module : Lexer_common 2021-03-09 22:57:41 +03:00
			`Licensed under the Apache License, Version 2.0 (the "License"); you may not`
			`use this file except in compliance with the License. You may obtain a copy of`
			`the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`License for the specific language governing permissions and limitations under`
			`the License. *)`

refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00			`(** Auxiliary functions used by all lexers. *)`

Fix lexing of verbatim blocks Catala doesn't interpret them at all, but it needs to refrain from interpreting its contents as markdown (titles, etc.) 2024-05-16 16:45:16 +03:00			`type lexing_context = Law \| Raw \| Code \| Directive \| Directive_args`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
Cleanup the lexer, and refactor for more generic directives 2021-08-17 16:49:48 +03:00			`val context : lexing_context ref`
			`(** Reference, used by the lexer as the mutable state to distinguish whether it`
			`is lexing code or law. *)`

			`val code_buffer : Buffer.t`
			`(** Buffer that accumulates the string representation of the body of code being`
			`lexed. This string representation is used in the literate programming`
			`backends to faithfully capture the spacing pattern of the original program *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
			`val update_acc : Sedlexing.lexbuf -> unit`
Cleanup the lexer, and refactor for more generic directives 2021-08-17 16:49:48 +03:00			`(** Updates {!val:code_buffer} with the current lexeme *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
Rename utils to catala_utils 2022-11-21 12:46:17 +03:00			`val raise_lexer_error : Catala_utils.Pos.t -> string -> 'a`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00			`(** Error-generating helper *)`

			`val token_list_language_agnostic : (string * Tokens.token) list`
fix(build/doc): remove warnings due to .ml* files 2022-01-02 16:53:51 +03:00			`(** Associative list matching each punctuation string part of the Catala syntax`
			`with its {!Surface.Parser} token. Same for all the input languages (English,`
			`French, etc.) *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
refactor(lexer): factorize the law heading exctraction in a new module : Lexer_common 2021-03-09 22:57:41 +03:00			`val calc_precedence : string -> int`
			`(** Calculates the precedence according a matched regex of the form : '[#]+' *)`

Pass the localised builtins as parameters to the parser 2021-04-30 10:59:09 +03:00			`val get_law_heading : Sedlexing.lexbuf -> Tokens.token`
refactor(lexer): factorize the law heading exctraction in a new module : Lexer_common 2021-03-09 22:57:41 +03:00			`(** Gets the [LAW_HEADING] token from the current [lexbuf] *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
Add a lightweight lexer for dependency extraction in Clerk The base is there but not fully used yet in Clerk 2023-09-11 17:44:35 +03:00			`(** Simplified tokens for dependency extraction *)`
			`type line_token =`
			\| LINE_TEST of string (* ```catala-test { id = xx } *)
			\| LINE_INLINE_TEST (* ```catala-test-inline *)
			\| LINE_BLOCK_END (* ``` *)
			`\| LINE_INCLUDE of string (* > Include foo.catala_en *)`
Document and first test for externals Also some fixes for Clerk to properly support them 2023-12-01 17:24:54 +03:00			`\| LINE_MODULE_DEF of string * bool (* > Module Xxx [external] *)`
Add a lightweight lexer for dependency extraction in Clerk The base is there but not fully used yet in Clerk 2023-09-11 17:44:35 +03:00			`\| LINE_MODULE_USE of string (* > Using Xxx [as Yyy] *)`
			`\| LINE_ANY (* anything else *)`

refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00			`module type LocalisedLexer = sig`
			`val token_list : (string * Tokens.token) list`
Fix doc generation 2022-04-14 12:18:26 +03:00			`(** Same as {!val:Surface.Lexer_common.token_list_language_agnostic}, but with`
			`tokens whose string varies with the input language. *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
Factorise lexer translations 2021-08-19 19:26:06 +03:00			`val lex_builtin : string -> Ast.builtin_expression option`
			`(** Simple lexer for builtins *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00
			`val lex_code : Sedlexing.lexbuf -> Tokens.token`
			`(** Main lexing function used in a code block *)`

			`val lex_law : Sedlexing.lexbuf -> Tokens.token`
			`(** Main lexing function used outside code blocks *)`

			`val lexer : Sedlexing.lexbuf -> Tokens.token`
Fix doc generation 2022-04-14 12:18:26 +03:00			`(** Entry point of the lexer, distributes to {!val:lex_code} or {!val:lex_law}`
			`depending of the current {!val:Surface.Lexer_common.context}. *)`
Add a lightweight lexer for dependency extraction in Clerk The base is there but not fully used yet in Clerk 2023-09-11 17:44:35 +03:00
			`val lex_line : Sedlexing.lexbuf -> (string * line_token) option`
			`(** Low-level lexer intended for dependency extraction. The whole line`
			`(including ["\n"] is always returned together with the token. [None] for`
			`EOF. *)`
refactor(lexer)!: removes the abbreviated syntax lexer (lexer.ml) 2021-05-26 18:39:39 +03:00			`end`