From 258a0c3a059600e989c0b37a090bc25cd2d1d3b4 Mon Sep 17 00:00:00 2001 From: Rijnard van Tonder Date: Mon, 17 May 2021 21:05:27 -0700 Subject: [PATCH] overhaul match and rewrite templating (#285) --- comby-kernel.opam | 1 - lib/app/comby.ml | 2 - lib/app/comby.mli | 4 - .../configuration/command_configuration.ml | 9 +- .../configuration/command_configuration.mli | 2 - lib/app/configuration/regex.ml | 132 --- lib/app/configuration/regex.mli | 3 - lib/app/pipeline/pipeline.ml | 19 +- lib/kernel/comby_kernel.ml | 13 +- lib/kernel/comby_kernel.mli | 263 +++--- lib/kernel/match/dune | 2 +- lib/kernel/matchers/alpha.ml | 199 +++-- lib/kernel/matchers/ast.ml | 5 + lib/kernel/matchers/dune | 2 +- lib/kernel/matchers/engine.ml | 2 +- lib/kernel/matchers/evaluate.ml | 186 ++--- lib/kernel/matchers/languages.ml | 2 +- lib/kernel/matchers/matchers.ml | 13 +- lib/kernel/matchers/matchers.mli | 12 +- lib/kernel/matchers/metasyntax.ml | 2 +- lib/kernel/matchers/omega.ml | 625 ++++++++------- ...omega_parser.ml => omega_parser_helper.ml} | 109 +-- lib/kernel/matchers/regexp.ml | 4 +- lib/kernel/matchers/regexp.mli | 2 +- lib/kernel/matchers/rewrite.ml | 212 +++-- lib/kernel/matchers/rewrite.mli | 11 + lib/kernel/matchers/rewrite_template.ml | 281 ------- lib/kernel/matchers/rewrite_template.mli | 29 - lib/kernel/matchers/rewriter.ml | 2 - lib/kernel/matchers/rewriter.mli | 2 - lib/kernel/matchers/rule.ml | 279 ++++--- lib/kernel/matchers/script.ml | 15 +- lib/kernel/matchers/specification.ml | 116 ++- lib/kernel/matchers/specification.mli | 2 + lib/kernel/matchers/template.ml | 212 +++++ lib/kernel/matchers/template.mli | 16 + lib/kernel/matchers/types.ml | 137 +++- lib/kernel/parsers/dune | 2 +- lib/kernel/parsers/omega_comments.ml | 2 +- lib/kernel/parsers/omega_string_literals.ml | 2 +- lib/vendored/dune | 0 lib/vendored/vangstrom/LICENSE | 30 + lib/vendored/vangstrom/async/dune | 5 + .../vangstrom/async/vangstrom_async.ml | 85 ++ .../vangstrom/async/vangstrom_async.mli | 48 ++ lib/vendored/vangstrom/dune | 0 lib/vendored/vangstrom/lib/buffering.ml | 88 ++ lib/vendored/vangstrom/lib/buffering.mli | 20 + lib/vendored/vangstrom/lib/dune | 6 + lib/vendored/vangstrom/lib/exported_state.ml | 22 + lib/vendored/vangstrom/lib/input.ml | 111 +++ lib/vendored/vangstrom/lib/input.mli | 88 ++ lib/vendored/vangstrom/lib/more.ml | 3 + lib/vendored/vangstrom/lib/more.mli | 3 + lib/vendored/vangstrom/lib/parser.ml | 173 ++++ lib/vendored/vangstrom/lib/vangstrom.ml | 753 ++++++++++++++++++ lib/vendored/vangstrom/lib/vangstrom.mli | 684 ++++++++++++++++ lib/vendored/vangstrom/lwt/dune | 5 + .../vangstrom/lwt/vangstrom_lwt_unix.ml | 81 ++ .../vangstrom/lwt/vangstrom_lwt_unix.mli | 72 ++ lib/vendored/vangstrom/unix/dune | 4 + lib/vendored/vangstrom/unix/vangstrom_unix.ml | 52 ++ .../vangstrom/unix/vangstrom_unix.mli | 48 ++ src/dune | 20 +- src/main.ml | 26 +- src/server.ml | 2 +- test/alpha/dune | 1 - test/alpha/test_custom_metasyntax.ml | 188 ----- test/alpha/test_special_matcher_cases.ml | 2 +- test/alpha/test_substring_disabled.ml | 3 +- test/common/dune | 5 +- test/common/test_bash.ml | 19 +- test/common/test_c.ml | 67 +- test/common/test_c_separators.ml | 23 +- test/common/test_c_style_comments.ml | 102 +-- test/common/test_cli.ml | 46 +- test/common/test_custom_metasyntax.ml | 276 +++++++ test/common/test_extract_regex.ml | 11 +- test/common/test_go.ml | 27 +- test/common/test_helpers.ml | 78 +- test/common/test_hole_extensions.ml | 15 +- test/common/test_match_offsets.ml | 17 + test/common/test_match_rule.ml | 8 +- test/common/test_nested_matches.ml | 38 +- test/common/test_parse_rewrite_template.ml | 69 -- test/common/test_parse_rule.ml | 85 +- test/common/test_parse_template.ml | 114 +++ test/common/test_pipeline.ml | 2 - test/common/test_rewrite_parts.ml | 40 +- test/common/test_rewrite_rule.ml | 19 - test/common/test_script.ml | 61 +- test/common/test_statistics.ml | 2 +- test/common/test_substitute.ml | 14 +- test/common/test_template_constraints.ml | 17 + test/common/test_user_defined_language.ml | 6 +- test/example/metasyntax/test.json | 8 + 96 files changed, 4576 insertions(+), 2149 deletions(-) delete mode 100644 lib/app/configuration/regex.ml delete mode 100644 lib/app/configuration/regex.mli create mode 100644 lib/kernel/matchers/ast.ml rename lib/kernel/matchers/{omega_parser.ml => omega_parser_helper.ml} (58%) delete mode 100644 lib/kernel/matchers/rewrite_template.ml delete mode 100644 lib/kernel/matchers/rewrite_template.mli delete mode 100644 lib/kernel/matchers/rewriter.ml delete mode 100644 lib/kernel/matchers/rewriter.mli create mode 100644 lib/kernel/matchers/template.ml create mode 100644 lib/kernel/matchers/template.mli create mode 100644 lib/vendored/dune create mode 100644 lib/vendored/vangstrom/LICENSE create mode 100644 lib/vendored/vangstrom/async/dune create mode 100644 lib/vendored/vangstrom/async/vangstrom_async.ml create mode 100644 lib/vendored/vangstrom/async/vangstrom_async.mli create mode 100644 lib/vendored/vangstrom/dune create mode 100644 lib/vendored/vangstrom/lib/buffering.ml create mode 100644 lib/vendored/vangstrom/lib/buffering.mli create mode 100644 lib/vendored/vangstrom/lib/dune create mode 100644 lib/vendored/vangstrom/lib/exported_state.ml create mode 100644 lib/vendored/vangstrom/lib/input.ml create mode 100644 lib/vendored/vangstrom/lib/input.mli create mode 100644 lib/vendored/vangstrom/lib/more.ml create mode 100644 lib/vendored/vangstrom/lib/more.mli create mode 100644 lib/vendored/vangstrom/lib/parser.ml create mode 100644 lib/vendored/vangstrom/lib/vangstrom.ml create mode 100644 lib/vendored/vangstrom/lib/vangstrom.mli create mode 100644 lib/vendored/vangstrom/lwt/dune create mode 100644 lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.ml create mode 100644 lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.mli create mode 100644 lib/vendored/vangstrom/unix/dune create mode 100644 lib/vendored/vangstrom/unix/vangstrom_unix.ml create mode 100644 lib/vendored/vangstrom/unix/vangstrom_unix.mli delete mode 100644 test/alpha/test_custom_metasyntax.ml create mode 100644 test/common/test_custom_metasyntax.ml create mode 100644 test/common/test_match_offsets.ml delete mode 100644 test/common/test_parse_rewrite_template.ml create mode 100644 test/common/test_parse_template.ml create mode 100644 test/common/test_template_constraints.ml create mode 100644 test/example/metasyntax/test.json diff --git a/comby-kernel.opam b/comby-kernel.opam index 5570ca5..f03b176 100644 --- a/comby-kernel.opam +++ b/comby-kernel.opam @@ -20,7 +20,6 @@ build: [ depends: [ "dune" {>= "2.8.0"} "ocaml" {>= "4.08.1"} - "angstrom" {>= "0.15.0"} "core_kernel" "mparser" "mparser-pcre" diff --git a/lib/app/comby.ml b/lib/app/comby.ml index bd23d43..58d7053 100644 --- a/lib/app/comby.ml +++ b/lib/app/comby.ml @@ -4,5 +4,3 @@ module Pipeline = struct include Configuration.Command_input include Pipeline end - -module Regex = Configuration.Regex diff --git a/lib/app/comby.mli b/lib/app/comby.mli index f2a2a1e..99deb42 100644 --- a/lib/app/comby.mli +++ b/lib/app/comby.mli @@ -39,7 +39,3 @@ module Pipeline : sig -> Matchers.specification -> output end - -module Regex : sig - val to_regex : Matchers.specification -> string -end diff --git a/lib/app/configuration/command_configuration.ml b/lib/app/configuration/command_configuration.ml index bab042b..a9c17d2 100644 --- a/lib/app/configuration/command_configuration.ml +++ b/lib/app/configuration/command_configuration.ml @@ -238,7 +238,6 @@ type run_options = { verbose : bool ; match_timeout : int ; dump_statistics : bool - ; substitute_in_place : bool ; disable_substring_matching : bool ; fast_offset_conversion : bool ; match_newline_toplevel : bool @@ -433,7 +432,6 @@ type t = ; output_printer : Printer.t ; interactive_review : interactive_review option ; matcher : (module Matchers.Matcher.S) - ; extension : string option ; metasyntax : Matchers.Metasyntax.t option } @@ -621,7 +619,7 @@ let syntax custom_matcher_path = exit 1 | `Yes -> Yojson.Safe.from_file custom_matcher_path - |> Matchers.Syntax.of_yojson + |> Matchers.Language.Syntax.of_yojson |> function | Ok c -> c | Error error -> @@ -682,7 +680,7 @@ let select_matcher custom_metasyntax custom_matcher override_matcher file_filter let regex_of_specifications specifications = Format.sprintf "(%s)" @@ String.concat ~sep:")|(" - @@ List.map specifications ~f:Regex.to_regex + @@ List.map specifications ~f:Matchers.Specification.to_regex let ripgrep_file_filters specifications args : string list = let regex = regex_of_specifications specifications in @@ -832,11 +830,10 @@ let create else Printer.Rewrite.print replacement_output source_path replacements result source_content in - let (module M) as matcher, extension, metasyntax = + let (module M) as matcher, _, metasyntax = select_matcher custom_metasyntax custom_matcher override_matcher file_filters omega in return { matcher - ; extension ; sources ; specifications ; run_options diff --git a/lib/app/configuration/command_configuration.mli b/lib/app/configuration/command_configuration.mli index 2c49f54..e00a897 100644 --- a/lib/app/configuration/command_configuration.mli +++ b/lib/app/configuration/command_configuration.mli @@ -71,7 +71,6 @@ type run_options = { verbose : bool ; match_timeout : int ; dump_statistics : bool - ; substitute_in_place : bool ; disable_substring_matching : bool ; fast_offset_conversion : bool ; match_newline_toplevel : bool @@ -92,7 +91,6 @@ type t = ; output_printer : Printer.t ; interactive_review : interactive_review option ; matcher : (module Matchers.Matcher.S) - ; extension : string option ; metasyntax : Matchers.Metasyntax.t option } diff --git a/lib/app/configuration/regex.ml b/lib/app/configuration/regex.ml deleted file mode 100644 index 65c5f1d..0000000 --- a/lib/app/configuration/regex.ml +++ /dev/null @@ -1,132 +0,0 @@ -open Core -open Angstrom - -open Comby_kernel - -let (|>>) p f = - p >>= fun x -> return (f x) - -let alphanum = - satisfy (function - | 'a' .. 'z' - | 'A' .. 'Z' - | '0' .. '9' -> true - | _ -> false) - -let blank = - choice - [ char ' ' - ; char '\t' - ] - -let identifier_parser () = - many (alphanum <|> char '_') - |>> String.of_char_list - -let single_hole_parser () = - string ":[[" *> identifier_parser () <* string "]]" |>> fun _ -> None - -let everything_hole_parser () = - string ":[" *> identifier_parser () <* string "]" |>> fun _ -> None - -let expression_hole_parser () = - string ":[" *> identifier_parser () <* string ":e" <* string "]" |>> fun _ -> None - -let non_space_hole_parser () = - string ":[" *> identifier_parser () <* string ".]" |>> fun _ -> None - -let line_hole_parser () = - string ":[" *> identifier_parser () <* string "\\n]" |>> fun _ -> None - -let blank_hole_parser () = - string ":[" - *> many1 blank - *> identifier_parser () - <* string "]" - |>> fun _ -> None - -let any_char_except ~reserved = - List.fold reserved - ~init:(return `OK) - ~f:(fun acc reserved_sequence -> - option `End_of_input - (peek_string (String.length reserved_sequence) - >>= fun s -> - if String.equal s reserved_sequence then - return `Reserved_sequence - else - acc)) - >>= function - | `OK -> any_char - | `End_of_input -> any_char - | `Reserved_sequence -> fail "reserved sequence hit" - -let regex_body () = - fix (fun expr -> - (choice - [ ((char '[' *> (many1 expr) <* char ']') - |>> fun char_class -> Format.sprintf "[%s]" @@ String.concat char_class) - ; (char '\\' *> any_char |>> fun c -> (Format.sprintf "\\%c" c)) - ; ((any_char_except ~reserved:["]"])) |>> Char.to_string - ] - )) - -let regex_hole_parser () = - string ":[" - *> identifier_parser () - *> char '~' - *> (many1 @@ regex_body ()) >>= fun regex -> - string "]" >>= fun _ -> return (Some (String.concat regex)) - -type extracted = - | Regex of string - | Constant of string - -let extract : extracted list Angstrom.t = - let hole = - choice - [ single_hole_parser () - ; everything_hole_parser () - ; expression_hole_parser () - ; non_space_hole_parser () - ; line_hole_parser () - ; blank_hole_parser () - ; regex_hole_parser () - ] - in - many @@ choice - [ (hole >>= fun v -> return (Option.map v ~f:(fun v -> Regex v))) - ; ((many1 @@ any_char_except ~reserved:[":["])) >>= fun c -> - return (Some (Constant (String.of_char_list c))) - ] - >>= fun result -> return (List.filter_opt result) - -let escape s = - let rec aux chars = - match chars with - | [] -> [] - | x :: xs -> - match x with - | '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' as c -> - '\\' :: c :: (aux xs) - | c -> c :: (aux xs) - in - aux (String.to_list s) - |> String.of_char_list - -let to_regex Matchers.Specification.{ match_template; _ } = - let state = Buffered.parse extract in - let state = Buffered.feed state (`String match_template) in - let extracted = - match Buffered.feed state `Eof with - | Buffered.Done (_, result) -> result - | _ -> failwith "Could not parse template for ripgrep" - in - (* Escape regex metachars *) - let extracted = List.map extracted ~f:(function | Constant s -> escape s | Regex s -> s) in - (* Replace contiguous spaces with the regex \s+ *) - let match_spaces = Str.regexp "[ \t\r\n]+" in - let extracted = List.map extracted ~f:(fun part -> Str.global_replace match_spaces {|\s+|} part) in - (* ?s is modifier metasyntax where . matches all chars including newlines. See - regular-expressions.info/modifier.html *) - Format.sprintf "(%s)" @@ String.concat extracted ~sep:")(\\n|.)*?(" diff --git a/lib/app/configuration/regex.mli b/lib/app/configuration/regex.mli deleted file mode 100644 index e74746e..0000000 --- a/lib/app/configuration/regex.mli +++ /dev/null @@ -1,3 +0,0 @@ -open Comby_kernel - -val to_regex : Matchers.specification -> string diff --git a/lib/app/pipeline/pipeline.ml b/lib/app/pipeline/pipeline.ml index 19f3485..e5104a5 100644 --- a/lib/app/pipeline/pipeline.ml +++ b/lib/app/pipeline/pipeline.ml @@ -25,10 +25,8 @@ let timed_run (match rewrite_template with | Some template -> Matcher.set_rewrite_template template; | None -> ()); - let rule = Option.value rule ~default:(Rule.create "where true" |> Or_error.ok_exn) in - let options = Rule.options rule in - let matches = Matcher.all ~rule ~nested:options.nested ~configuration ~template ~source () in - List.map matches ~f:(Match.convert_offset ~fast:fast_offset_conversion ~source) + Matcher.all ?rule ~configuration ~template ~source () + |> List.map ~f:(Match.convert_offset ~fast:fast_offset_conversion ~source) type output = | Matches of (Match.t list * int) @@ -134,11 +132,12 @@ let run_on_specifications mode specifications process (input : single_source) = | Matches _, Replacement (l, content, n) | Replacement (l, content, n), Matches _ -> - Format.eprintf "WARNING: input configuration specifies both rewrite \ - and match templates. I am choosing to only process the \ - configurations with both a 'match' and 'rewrite' part. \ - If you only want to see matches, add -match-only to \ - suppress this warning@."; + Format.eprintf + "WARNING: input configuration specifies both rewrite \ + and match templates. I am choosing to only process the \ + configurations with both a 'match' and 'rewrite' part. \ + If you only want to see matches, add -match-only to \ + suppress this warning@."; Replacement (l, content, n) ) in @@ -239,7 +238,6 @@ let run { verbose ; match_timeout = timeout ; dump_statistics - ; substitute_in_place = _ (* FIXME remove *) ; disable_substring_matching ; fast_offset_conversion ; match_newline_toplevel @@ -248,7 +246,6 @@ let run } ; output_printer ; interactive_review - ; extension = _ (* FIXME *) ; metasyntax } = diff --git a/lib/kernel/comby_kernel.ml b/lib/kernel/comby_kernel.ml index b082e58..5e925ba 100644 --- a/lib/kernel/comby_kernel.ml +++ b/lib/kernel/comby_kernel.ml @@ -7,7 +7,6 @@ type replacement = Replacement.result module Matchers = struct module Engine = Matchers.Engine - module Info = Matchers.Info module Language = Matchers.Language module Matcher = Matchers.Matcher @@ -15,8 +14,6 @@ module Matchers = struct module Configuration = Matchers.Configuration type configuration = Configuration.t - module Syntax = Matchers.Syntax - type syntax = Matchers.Syntax.t module Hole = Matchers.Hole module Metasyntax = Matchers.Metasyntax @@ -27,9 +24,12 @@ module Matchers = struct module Languages = Matchers.Languages + module Template = Matchers.Template + + module Ast = Matchers.Ast + module Rule = struct include Matchers.Rule - include Matchers.Rule.Ast include Matchers.Rule.Parser include Matchers.Evaluate end @@ -38,8 +38,5 @@ module Matchers = struct module Specification = Matchers.Specification type specification = Specification.t - module Rewrite = struct - include Matchers.Rewriter.Rewrite - include Matchers.Rewriter.Rewrite_template - end + module Rewrite = Matchers.Rewriter end diff --git a/lib/kernel/comby_kernel.mli b/lib/kernel/comby_kernel.mli index 88eedb3..d1ccd01 100644 --- a/lib/kernel/comby_kernel.mli +++ b/lib/kernel/comby_kernel.mli @@ -327,6 +327,71 @@ module Matchers : sig type metasyntax = Metasyntax.t + (** {3 Template} + + Parse a template based on metasynax *) + + module Template : sig + type kind = + | Value + | Length + | FileName + | FilePath + | Type + + type syntax = + { variable : string + ; pattern : string + ; offset : int + ; kind : kind + } + [@@deriving sexp] + + type atom = + | Hole of syntax + | Constant of string + [@@deriving sexp] + + type t = atom list + [@@deriving sexp] + + module Make : Metasyntax.S -> sig + val parse : string -> t + val variables : string -> syntax list + end + end + + (** {3 AST} + + Defines a rule AST. *) + module Ast : sig + type atom = + | Template of Template.t + | String of string + [@@deriving sexp] + + type antecedent = atom + [@@deriving sexp] + + type kind = + | Value + | Length + | Type + | File + [@@deriving sexp] + + type expression = + | True + | False + | Option of string + | Equal of atom * atom + | Not_equal of atom * atom + | Match of atom * (antecedent * consequent) list + | Rewrite of atom * (antecedent * atom) + and consequent = expression list + [@@deriving sexp] + end + (** {3 Matcher} Defines the functions that a matcher can perform. *) @@ -338,7 +403,6 @@ module Matchers : sig val all : ?configuration:configuration -> ?rule:Rule.t - -> ?nested:bool -> template:string -> source:string -> unit @@ -370,28 +434,6 @@ module Matchers : sig Defines types and operations for match rules. *) and Rule : sig - module Ast : sig - type atom = - | Variable of string - | String of string - [@@deriving sexp] - - type antecedent = atom - [@@deriving sexp] - - type expression = - | True - | False - | Option of string - | Equal of atom * atom - | Not_equal of atom * atom - | Match of atom * (antecedent * consequent) list - | RewriteTemplate of string - | Rewrite of atom * (antecedent * expression) - and consequent = expression list - [@@deriving sexp] - end - type t = Ast.expression list [@@deriving sexp] @@ -417,11 +459,15 @@ module Matchers : sig rules. [metasyntax] uses the custom metasyntax definition. *) val apply : ?substitute_in_place:bool -> - ?fresh:(unit -> string) -> ?metasyntax:Metasyntax.t -> - match_all:(?configuration:Configuration.t -> - template:string -> source:string -> unit -> Match.t list) -> - Rule.Ast.expression list -> + match_all:( + ?configuration:Configuration.t -> + template:string -> + source:string -> + unit -> + Match.t list + ) -> + Ast.expression list -> Match.Environment.t -> result end @@ -444,73 +490,79 @@ module Matchers : sig replacements rather than just matches (see [process_single_source] below). *) val create : ?rewrite_template:string -> ?rule:rule -> match_template:string -> unit -> t + + (** [regex [t] returns a generalized regular expression corresponding to the specification *) + val to_regex : t -> string end type specification = Specification.t - (** {3 Syntax} - Defines the syntax structures for the target language (C, Go, etc.) that - are significant for matching. *) - module Syntax : sig - - (** Defines a set of quoted syntax for strings based on one or more - delimiters and associated escape chracter. - - E.g., this supports single and double quotes with escape character '\' - as: { delimiters = [ {|"|}, {|'|} ]; escape_character = '\\' } *) - type escapable_string_literals = - { delimiters : string list - ; escape_character: char - } - - (** Defines comment syntax as one of Multiline, Nested_multiline with - associated left and right delimiters, or Until_newline that defines a - comment prefix. associated prefix. *) - type comment_kind = - | Multiline of string * string - | Nested_multiline of string * string - | Until_newline of string - - (** Defines syntax as: - - - [user_defined_delimiters] are delimiters treated as code structures - (parentheses, brackets, braces, alphabetic words) - - [escapable_string_literals] are escapable quoted strings - - - [raw_string literals] are raw quoted strings that have no escape - character - - - [comments] are comment structures *) - type t = - { user_defined_delimiters : (string * string) list - ; escapable_string_literals : escapable_string_literals option [@default None] - ; raw_string_literals : (string * string) list - ; comments : comment_kind list - } - - val to_yojson : t -> Yojson.Safe.json - val of_yojson : Yojson.Safe.json -> (t, string) Result.t - - (** The module signature that defines language syntax for a matcher *) - module type S = sig - val user_defined_delimiters : (string * string) list - val escapable_string_literals : escapable_string_literals option - val raw_string_literals : (string * string) list - val comments : comment_kind list - end - end - - type syntax = Syntax.t - - module Info : sig - module type S = sig - val name : string - val extensions : string list - end - end + (** {3 Language} + Language definitions *) module Language : sig + + (** {4 Syntax} + + Defines the syntax structures for the target language (C, Go, etc.) that + are significant for matching. *) + module Syntax : sig + + (** Defines a set of quoted syntax for strings based on one or more + delimiters and associated escape chracter. + + E.g., this supports single and double quotes with escape character '\' + as: { delimiters = [ {|"|}, {|'|} ]; escape_character = '\\' } *) + type escapable_string_literals = + { delimiters : string list + ; escape_character: char + } + + (** Defines comment syntax as one of Multiline, Nested_multiline with + associated left and right delimiters, or Until_newline that defines a + comment prefix. associated prefix. *) + type comment_kind = + | Multiline of string * string + | Nested_multiline of string * string + | Until_newline of string + + (** Defines syntax as: + + - [user_defined_delimiters] are delimiters treated as code structures + (parentheses, brackets, braces, alphabetic words) - + [escapable_string_literals] are escapable quoted strings + + - [raw_string literals] are raw quoted strings that have no escape + character + + - [comments] are comment structures *) + type t = + { user_defined_delimiters : (string * string) list + ; escapable_string_literals : escapable_string_literals option [@default None] + ; raw_string_literals : (string * string) list + ; comments : comment_kind list + } + + val to_yojson : t -> Yojson.Safe.json + val of_yojson : Yojson.Safe.json -> (t, string) Result.t + + (** The module signature that defines language syntax for a matcher *) + module type S = sig + val user_defined_delimiters : (string * string) list + val escapable_string_literals : escapable_string_literals option + val raw_string_literals : (string * string) list + val comments : comment_kind list + end + end + + module Info : sig + module type S = sig + val name : string + val extensions : string list + end + end + module type S = sig module Info : Info.S module Syntax : Syntax.S @@ -642,7 +694,7 @@ module Matchers : sig (** [create metasyntax syntax] creates a matcher for a language defined by [syntax]. If [metasyntax] is specified, the matcher will use a custom metasyntax definition instead of the default. *) - val create : ?metasyntax:Metasyntax.t -> Syntax.t -> (module Matcher.S) + val create : ?metasyntax:Metasyntax.t -> Language.Syntax.t -> (module Matcher.S) end end @@ -681,40 +733,21 @@ module Matchers : sig -> match' list -> replacement option - (** [substitute metasyntax fresh template environment] substitutes [template] - with the variable and value pairs in the [environment]. It returns the - result after substitution, and the list of variables in [environment] that - were substituted for. If [metasyntax] is defined, the rewrite template will - respect custom metasyntax definitions. + (** [substitute metasyntax fresh template environment] substitutes + [template] with the variable and value pairs in the [environment]. It + returns the result after substitution. If [metasyntax] is defined, the + rewrite template will respect custom metasyntax definitions. The syntax :[id()] is substituted with fresh values. If [fresh] is not specified, the default behavior substitutes :[id()] starting with 1, and - subsequent :[id()] values increment the ID. If [fresh] is set, substitutes - the pattern :[id()] with the value of fresh () as the hole is encountered, - left to right. *) + subsequent :[id()] values increment the ID. If [fresh] is set, + substitutes the pattern :[id()] with the value of fresh () as the hole is + encountered, left to right. *) val substitute : ?metasyntax:metasyntax -> ?fresh:(unit -> string) -> string -> Match.environment - -> (string * string list) - - type syntax = - { variable: string - ; pattern: string - } - - type extracted = - | Hole of syntax - | Constant of string - - module Make : Metasyntax.S -> sig - val parse : string -> extracted list option - val variables : string -> syntax list - end - - val get_offsets_for_holes : syntax list -> string -> (string * int) list - - val get_offsets_after_substitution : (string * int) list -> Match.environment -> (string * int) list + -> string end end diff --git a/lib/kernel/match/dune b/lib/kernel/match/dune index 5ecaa5f..03b120a 100644 --- a/lib/kernel/match/dune +++ b/lib/kernel/match/dune @@ -3,4 +3,4 @@ (public_name comby-kernel.match) (instrumentation (backend bisect_ppx)) (preprocess (pps ppx_deriving.eq ppx_sexp_conv ppx_deriving_yojson)) - (libraries comby-kernel.parsers core_kernel yojson ppx_deriving_yojson ppx_deriving_yojson.runtime)) + (libraries core_kernel yojson ppx_deriving_yojson ppx_deriving_yojson.runtime)) diff --git a/lib/kernel/matchers/alpha.ml b/lib/kernel/matchers/alpha.ml index cb2ce9f..d526482 100644 --- a/lib/kernel/matchers/alpha.ml +++ b/lib/kernel/matchers/alpha.ml @@ -41,17 +41,6 @@ let is_not p s = | Some c -> Consumed_ok (c, advance_state s 1, No_error) | None -> Empty_failed (unknown_error s) -let infer_equality_constraints environment = - let vars = Match.Environment.vars environment in - List.fold vars ~init:[] ~f:(fun acc var -> - if String.is_suffix var ~suffix:"_equal" then - match String.split var ~on:'_' with - | _uuid :: target :: _equal -> - let expression = Rule.Ast.Equal (Variable var, Variable target) in - expression::acc - | _ -> acc - else - acc) type 'a literal_parser_callback = contents:string -> left_delimiter:string -> right_delimiter:string -> 'a type 'a nested_delimiter_callback = left_delimiter:string -> right_delimiter:string -> 'a @@ -63,6 +52,20 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct let wildcard = "_" + let create v = + Types.Ast.Template [Hole { variable = v; pattern = v; offset = 0; kind = Value }] + + let implicit_equals_satisfied environment identifier range matched = + if debug then Format.printf "Looking up %s@." identifier; + match Environment.lookup environment identifier with + | None -> Some (Environment.add ~range environment identifier (String.concat matched)) + | Some _ when String.(identifier = wildcard) -> Some environment + | Some existing_value when String.(existing_value = String.concat matched) -> + let identifier' = Format.sprintf "%s_equal_%s" identifier (!configuration_ref.fresh ()) in + let environment' = Environment.add ~range environment identifier' (String.concat matched) in + Some environment' + | _ -> None + let escapable_string_literal_parser (f : 'a literal_parser_callback) = (match Syntax.escapable_string_literals with | None -> [] @@ -173,7 +176,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct | _ -> assert false let is_alphanum delim = Pcre.(pmatch ~rex:(regexp "^[[:alnum:]]+$") delim) - let whitespace : (Types.id, Match.t) parser = many1 space |>> String.of_char_list + let whitespace : (string, Match.t) parser = many1 space |>> String.of_char_list let not_alphanum = many1 (is_not alphanum) |>> String.of_char_list let reserved_alphanum_delimiter_must_satisfy = Syntax.user_defined_delimiters @@ -263,14 +266,15 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct | Hole (sort, Delimited (left, right)) -> (sort, (p left >> hole_body () << p right))::acc | Hole (sort, Reserved_identifiers l) -> - (sort, choice (List.map ~f:(fun s -> string s |>> fun s -> s) l))::acc + (sort, choice (List.map ~f:string l))::acc | Regex (left, separator, right) -> (Regex, (p (Some left) >> regex_body separator right () << p (Some right)))::acc) let reserved_holes = List.map hole_parsers ~f:(fun (_, parser) -> parser >>= fun _ -> return "") - let reserved_delimiters () = + let reserved_parsers () = + (* Alphanum blocks *) let required_from_suffix = not_alphanum in let required_until_suffix = not_alphanum in let handle_alphanum_delimiters_reserved_trigger from until = @@ -297,36 +301,34 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct in [from_parser; until_parser] in - let reserved_delimiters = + (* Isomorphic to Omega *) + let user_defined_reserved_delimiters = List.concat_map Syntax.user_defined_delimiters ~f:(fun (from, until) -> if is_alphanum from && is_alphanum until then handle_alphanum_delimiters_reserved_trigger from until else [ string from; string until]) in - let reserved_escapable_strings = + let user_defined_reserved_escapable_strings = match Syntax.escapable_string_literals with | Some { delimiters; _ } -> - List.concat_map delimiters ~f:(fun delimiter -> [delimiter]) - |> List.map ~f:string + List.concat_map delimiters ~f:(fun delimiter -> [string delimiter]) | None -> [] in - let reserved_raw_strings = - List.concat_map Syntax.raw_string_literals ~f:(fun (from, until) -> [from; until]) - |> List.map ~f:string + let user_defined_reserved_raw_strings = + List.concat_map Syntax.raw_string_literals ~f:(fun (from, until) -> [string from; string until]) in - let reserved_comments = + let user_defined_reserved_comments = List.concat_map Syntax.comments ~f:(function - | Multiline (left, right) -> [left; right] - | Nested_multiline (left, right) -> [left; right] - | Until_newline start -> [start]) - |> List.map ~f:string + | Multiline (left, right) -> [string left; string right] + | Nested_multiline (left, right) -> [string left; string right] + | Until_newline start -> [string start]) in - [ reserved_holes - ; reserved_delimiters - ; reserved_escapable_strings - ; reserved_raw_strings - ; reserved_comments + [ user_defined_reserved_delimiters + ; reserved_holes + ; user_defined_reserved_escapable_strings + ; user_defined_reserved_raw_strings + ; user_defined_reserved_comments (* only needed once it's significant for matching and not treated like spaces*) ] |> List.concat |> List.map ~f:skip @@ -335,10 +337,6 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct |> List.map ~f:attempt |> choice - let reserved _s = - reserved_delimiters () - <|> skip (space |>> Char.to_string) - let until_of_from from = Syntax.user_defined_delimiters |> List.find_map ~f:(fun (from', until) -> if String.equal from from' then Some until else None) @@ -356,42 +354,37 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct else post_index, post_line, post_column in - update_user_state - (fun ({ Match.environment; _ } as result) -> - if debug then begin - Format.printf "Updating user state:@."; - Format.printf "%s |-> %s@." identifier (String.concat matched); - Format.printf "ID %s: %d:%d:%d - %d:%d:%d@." - identifier - pre_index pre_line pre_column - post_index post_line post_column; - end; - let pre_location : Location.t = - Location. - { offset = pre_index - ; line = pre_line - ; column = pre_column - } - in - let post_location : Location.t = - Location. - { offset = post_index - ; line = post_line - ; column = post_column - } - in - let range = { match_start = pre_location; match_end = post_location } in - let environment = - if Environment.exists environment identifier && String.(identifier <> wildcard) then - let fresh_hole_id = - Format.sprintf "%s_%s_equal" (!configuration_ref.fresh ()) identifier - in - Environment.add ~range environment fresh_hole_id (String.concat matched) - else - Environment.add ~range environment identifier (String.concat matched) - in - { result with environment }) - >>= fun () -> f matched + get_user_state >>= fun { environment; _ } -> + let pre_location : Location.t = + Location. + { offset = pre_index + ; line = pre_line + ; column = pre_column + } + in + let post_location : Location.t = + Location. + { offset = post_index + ; line = post_line + ; column = post_column + } + in + let range = { match_start = pre_location; match_end = post_location } in + match implicit_equals_satisfied environment identifier range matched with + | None -> fail "don't record, unsat" + | Some environment -> + update_user_state + (fun result -> + if debug then begin + Format.printf "Updating user state:@."; + Format.printf "%s |-> %s@." identifier (String.concat matched); + Format.printf "ID %s: %d:%d:%d - %d:%d:%d@." + identifier + pre_index pre_line pre_column + post_index post_line post_column; + end; + { result with environment }) + >>= fun () -> f matched let alphanum_delimiter_must_satisfy = many1 (is_not (skip (choice reserved_alphanum_delimiter_must_satisfy) <|> skip alphanum)) @@ -410,12 +403,11 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct Syntax.user_defined_delimiters in let reserved = - List.concat_map delimiters ~f:(fun (from, until) -> - [string from; string until] - ) + List.concat_map delimiters ~f:(fun (from, until) -> [string from; string until]) |> List.map ~f:attempt |> choice in + let other = is_not reserved |>> String.of_char in (* A parser that understands the hole matching cut off points happen at delimiters. *) let rec nested_grammar s = @@ -426,7 +418,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct <|> (attempt @@ delims_over_holes) (* Only consume if not reserved. If it is reserved, we want to trigger the 'many' in (many nested_grammar) to continue. *) - <|> (is_not (reserved <|> (space |>> Char.to_string)) |>> String.of_char)) + <|> other) s and delims_over_holes s = let between_nested_delims p = @@ -621,7 +613,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct if debug then Format.printf "Prefix parser unsat@."; fail "unsat" - let turn_holes_into_matchers_for_this_level ?left_delimiter ?right_delimiter p_list = + let turn_holes_into_matchers_for_this_level ?left_delimiter ?right_delimiter (p_list : (Types.production, Match.t) parser list) : (Types.production, Match.t) parser list = List.fold (List.rev p_list) ~init:[] ~f:(fun acc p -> match parse_string p "_signal_hole" (Match.create ()) with | Failed _ -> p::acc @@ -633,8 +625,8 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct | Hole _ -> None | Regex (_, separator, _) -> Some separator) in - let identifier, pattern = String.lsplit2_exn identifier ~on:separator in - let identifier = if String.(identifier = "") then "_" else identifier in + let identifier, pattern = String.lsplit2_exn identifier ~on:separator in (* FIXME parse *) + let identifier = if String.(identifier = "") then wildcard else identifier in if debug then Format.printf "Regex: Id: %s Pat: %s@." identifier pattern; let compiled_regexp = R.make_regexp pattern in let regexp_parser = R.regexp compiled_regexp in @@ -674,7 +666,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct | Non_space -> let allowed = - [skip space; reserved_delimiters ()] + [skip space; reserved_parsers ()] |> choice |> is_not |>> Char.to_string @@ -702,7 +694,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct | Expression -> let non_space = - [skip space; reserved_delimiters ()] + [skip space; reserved_parsers ()] |> choice |> is_not |>> Char.to_string @@ -751,13 +743,12 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct | Success _ -> failwith "Hole expected") let hole_parser ?at_depth sort dimension = - let open Types in - let open Hole in + let open Types.Hole in let hole_parser = let open Polymorphic_compare in List.fold ~init:[] hole_parsers ~f:(fun acc (sort', parser) -> if sort' = sort then parser::acc else acc) in - let skip_signal hole = skip (string "_signal_hole") |>> fun () -> Hole hole in + let skip_signal hole = skip (string "_signal_hole") |>> fun () -> Types.Hole hole in let at_depth = if !configuration_ref.match_newline_toplevel then None @@ -812,7 +803,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct and common _s = let holes at_depth = hole_parsers - |> List.map ~f:(fun (kind, _) -> attempt (hole_parser kind Code ~at_depth)) + |> List.map ~f:(fun (sort, _) -> attempt (hole_parser sort Code ~at_depth)) in choice [ (choice (holes !depth) >>= fun result -> if debug then Format.printf "Depth hole %d@." !depth; return result) @@ -826,9 +817,14 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct (* Optional: parse identifiers and disallow substring matching *) ; if !configuration_ref.disable_substring_matching then many1 (alphanum <|> char '_') |>> generate_word else zero (* Everything else. *) - ; (many1 (is_not (reserved _s)) >>= fun cl -> + ; (many1 @@ + is_not @@ + choice + [ reserved_parsers () + ; skip space + ] |>> fun cl -> if debug then Format.printf "%s" @@ String.of_char_list cl; - return @@ String.of_char_list cl) + String.of_char_list cl) |>> generate_string_token_parser ] @@ -931,6 +927,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct (** shift: start the scan in the source at an offset *) let first' shift p source : Match.t Or_error.t = + if debug then Format.printf "First for shift %d@." shift; let set_start_pos p = fun s -> p (advance_state s shift) in let p = set_start_pos p in match parse_string (pair p get_user_state) source (Match.create ()) with @@ -960,7 +957,8 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct in first' shift p source - let all ?configuration ?rule ?(nested = false) ~template ~source:original_source () : Match.t list = + let all ?configuration ?(rule = [Types.Ast.True]) ~template ~source:original_source () : Match.t list = + let Rule.{ nested } = Rule.options rule in let rec aux_all ?configuration ?(nested = false) ~template ~source:original_source () = let open Or_error in depth := (-1); @@ -980,6 +978,7 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct let rec aux acc shift = match first' shift p original_source with | Ok ({range = { match_start; match_end; _ }; environment; _} as result) -> + if debug then Format.printf "Ok first'"; let shift = match_end.offset in let shift, matched = if match_start.offset = match_end.offset then @@ -990,18 +989,15 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct if debug then Format.printf "Extracted matched: %s@." matched; let result = { result with matched } in let result = - match rule with - | None -> Some result - | Some rule -> - let rule = rule @ infer_equality_constraints environment in - (* FIXME metasyntax should propagate *) - let sat, env = Program.apply ~metasyntax:Metasyntax.default_metasyntax ~substitute_in_place:true rule environment in - if debug && Option.is_some env then Format.printf "Got back: %b %S" sat (Match.Environment.to_string @@ Option.value_exn env); - let new_env = if sat then env else None in - match new_env with - | None -> None - | Some env -> - Some { result with environment = env } + if debug then Format.printf "Rule: %s@." (Sexp.to_string @@ Rule.sexp_of_t rule); + (* FIXME metasyntax should propagate *) + let sat, env = Program.apply ~metasyntax:Metasyntax.default_metasyntax ~substitute_in_place:true rule environment in + if debug && Option.is_some env then Format.printf "Got back: %b %S" sat (Match.Environment.to_string @@ Option.value_exn env); + let new_env = if sat then env else None in + match new_env with + | None -> None + | Some env -> + Some { result with environment = env } in if shift >= String.length original_source then (match result with @@ -1089,7 +1085,6 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct and Program : sig val apply : ?substitute_in_place:bool - -> ?fresh:(unit -> string) -> ?metasyntax:Types.Metasyntax.t -> Rule.t -> Match.environment @@ -1098,15 +1093,13 @@ module Make (Lang : Types.Language.S) (Meta : Metasyntax.S) = struct let apply ?(substitute_in_place = true) - ?(fresh = Evaluate.counter) ?metasyntax rule env = Evaluate.apply ~substitute_in_place - ~fresh ?metasyntax - ~match_all:(Matcher.all ~rule:[Rule.Ast.True] ~nested:false) + ~match_all:(Matcher.all ~rule:[Types.Ast.True]) rule env end diff --git a/lib/kernel/matchers/ast.ml b/lib/kernel/matchers/ast.ml new file mode 100644 index 0000000..b4546cc --- /dev/null +++ b/lib/kernel/matchers/ast.ml @@ -0,0 +1,5 @@ +open Types.Ast + +let (=) left right = Equal (left, right) + +let (<>) left right = Not_equal (left, right) diff --git a/lib/kernel/matchers/dune b/lib/kernel/matchers/dune index 316d8e3..068e826 100644 --- a/lib/kernel/matchers/dune +++ b/lib/kernel/matchers/dune @@ -3,4 +3,4 @@ (public_name comby-kernel.matchers) (instrumentation (backend bisect_ppx)) (preprocess (pps ppx_here ppx_sexp_conv ppx_sexp_message ppx_deriving_yojson)) - (libraries comby-kernel.replacement comby-kernel.parsers comby-kernel.match angstrom core_kernel mparser mparser-pcre re yojson ppx_deriving_yojson)) + (libraries comby-kernel.replacement comby-kernel.parsers comby-kernel.match comby.vangstrom core_kernel mparser mparser-pcre re yojson ppx_deriving_yojson)) diff --git a/lib/kernel/matchers/engine.ml b/lib/kernel/matchers/engine.ml index d091036..ee235c2 100644 --- a/lib/kernel/matchers/engine.ml +++ b/lib/kernel/matchers/engine.ml @@ -7,7 +7,7 @@ module Make (Make : Types.Language.S -> Types.Metasyntax.S -> Types.Matcher.S) : let create ?(metasyntax = Metasyntax.default_metasyntax) - Types.Syntax. + Types.Language.Syntax. { user_defined_delimiters ; escapable_string_literals ; raw_string_literals diff --git a/lib/kernel/matchers/evaluate.ml b/lib/kernel/matchers/evaluate.ml index d2cc9af..0ac40eb 100644 --- a/lib/kernel/matchers/evaluate.ml +++ b/lib/kernel/matchers/evaluate.ml @@ -1,8 +1,7 @@ open Core_kernel open Match -open Rule -open Ast +open Types.Ast let debug = match Sys.getenv "DEBUG_COMBY" with @@ -29,131 +28,102 @@ let merge_match_environments matches environment' = List.map matches ~f:(fun { environment; _ } -> Environment.merge environment environment') -type rewrite_context = - { variable : string } +(* FIXME. Propagate this. *) +module Template = Template.Make(Metasyntax.Default) -let counter = - let uuid_for_id_counter = ref 0 in - fun () -> - uuid_for_id_counter := !uuid_for_id_counter + 1; - Format.sprintf "gu3ssme_%012d" !uuid_for_id_counter +let substitute env = function + | Template t -> Rewrite.substitute (Template.to_string t) env + | String s -> s -let equal_in_environment var value env = - match Environment.lookup env var with - | None -> false, Some env - | Some var_value -> String.equal var_value value, Some env - -let rec apply +let apply ?(substitute_in_place = true) - ?(fresh = counter) ?metasyntax ~(match_all:(?configuration:Configuration.t -> template:string -> source:string -> unit -> Match.t list)) predicates env = - let open Option in (* accepts only one expression *) - let rec rule_match ?(rewrite_context : rewrite_context option) env = + let rec eval env = function + (* true *) | True -> true, Some env + (* false *) | False -> false, Some env + (* option *) | Option _ -> true, Some env - | Equal (Variable var, String value) - | Equal (String value, Variable var) -> - equal_in_environment var value env + (* ==, != *) + | Equal (Template t, String value) + | Equal (String value, Template t) -> + let other = Rewrite.substitute (Template.to_string t) env in + let result = String.equal value other in + result, Some env | Equal (String left, String right) -> - String.equal left right, Some env - | Equal (Variable left, Variable right) -> - let result = - Environment.lookup env left >>= fun left -> - Environment.lookup env right >>= fun right -> - return (String.equal left right) - in - Option.value result ~default:false, Some env + let result = String.equal left right in + result, Some env + | Equal (Template left, Template right) -> + let left = Rewrite.substitute (Template.to_string left) env in + let right = Rewrite.substitute (Template.to_string right) env in + let result = String.equal left right in + result, Some env | Not_equal (left, right) -> - let sat, env = rule_match env (Equal (left, right)) in + let sat, env = eval env (Equal (left, right)) in not sat, env - | Match (Variable variable, cases) -> - if debug then Format.printf "ENV: %s@." (Environment.to_string env); - let result = - Environment.lookup env variable >>= fun source -> - let evaluate template case_expression = - let configuration = match_configuration_of_syntax template in - if debug then Format.printf "Running for template %s source %s@." template source; - match_all ~configuration ~template ~source () |> function - | [] -> - None - | matches -> - (* merge environments. overwrite behavior is undefined *) - if debug then Format.printf "Matches: %a@." Match.pp (None, matches); - let fold_matches (sat, out) { environment; _ } = - let fold_cases (sat, out) predicate = - if sat then - let env' = Environment.merge env environment in - rule_match ?rewrite_context env' predicate - else - (sat, out) - in - List.fold case_expression ~init:(sat, out) ~f:fold_cases + + (* match ... { ... } *) + | Match (source, cases) -> + let source = substitute env source in + let evaluate template case_expression = + let template = substitute env template in + let configuration = match_configuration_of_syntax template in + if debug then Format.printf "Running for template %s source %s@." template source; + match_all ~configuration ~template ~source () |> function + | [] -> + None + | matches -> + (* merge environments. overwrite behavior is undefined *) + if debug then Format.printf "Matches: %a@." Match.pp (None, matches); + let fold_matches (sat, out) { environment; _ } = + let fold_cases (sat, out) predicate = + if sat then + let env' = Environment.merge env environment in + eval env' predicate + else + (sat, out) in - List.fold matches ~init:(true, None) ~f:fold_matches - |> Option.some - in - List.find_map cases ~f:(fun (template, case_expression) -> - match template with - | String template - | Variable template -> - evaluate template case_expression) + List.fold case_expression ~init:(sat, out) ~f:fold_cases + in + List.fold matches ~init:(true, None) ~f:fold_matches + |> Option.some in - Option.value_map result ~f:ident ~default:(false, Some env) - | Match (String template, cases) -> - let source, _ = Rewriter.Rewrite_template.substitute ?metasyntax template env in - let fresh_var = fresh () in - let env = Environment.add env fresh_var source in - rule_match env (Match (Variable fresh_var, cases)) - | RewriteTemplate rewrite_template -> - begin - match rewrite_context with - | None -> false, None - | Some { variable; _ } -> - (* FIXME(RVT) assumes only contextual rewrite for now. *) - let env = - Rewrite_template.substitute rewrite_template env - |> fst - |> fun replacement' -> - Environment.update env variable replacement' - |> Option.some - in - true, env - end - | Rewrite (Variable variable, (match_template, rewrite_expression)) -> - begin match rewrite_expression with - | RewriteTemplate rewrite_template -> - let template = - match match_template with - | Variable _ -> failwith "Invalid syntax in rewrite LHS" - | String template -> template - in - let result = - Environment.lookup env variable >>= fun source -> - let configuration = Configuration.create ~match_kind:Fuzzy () in - let matches = match_all ~configuration ~template ~source () in - let source = if substitute_in_place then Some source else None in - let result = Rewrite.all ?metasyntax ?source ~rewrite_template matches in - match result with - | Some { rewritten_source; _ } -> - (* substitute for variables that are in the outside scope *) - let rewritten_source, _ = Rewrite_template.substitute ?metasyntax rewritten_source env in - let env = Environment.update env variable rewritten_source in - return (true, Some env) - | None -> - return (true, Some env) - in - Option.value_map result ~f:ident ~default:(false, Some env) - | _ -> failwith "Not implemented yet" - end + List.find_map cases ~f:(fun (template, case_expression) -> evaluate template case_expression) + |> Option.value_map ~f:ident ~default:(false, Some env) + + (* rewrite ... { ... } *) + | Rewrite (Template t, (match_template, rewrite_template)) -> + let rewrite_template = substitute env rewrite_template in + let template = substitute env match_template in + let source = Rewrite.substitute (Template.to_string t) env in + let configuration = Configuration.create ~match_kind:Fuzzy () in + let matches = match_all ~configuration ~template ~source () in + let source = if substitute_in_place then Some source else None in + let result = Rewrite.all ?metasyntax ?source ~rewrite_template matches in + if Option.is_empty result then + true, Some env (* rewrites are always sat. *) + else + let Replacement.{ rewritten_source; _ } = Option.value_exn result in + (* substitute for variables that are in the outside scope *) + let rewritten_source = Rewrite.substitute ?metasyntax rewritten_source env in + let variable = + match t with + | [ Types.Template.Hole { variable; _ } ] -> variable + | _ -> failwith "Cannot substitute for this template" + in + let env = Environment.update env variable rewritten_source in + true, Some env + | Rewrite _ -> failwith "TODO/Invalid: Have not decided whether rewrite \":[x]\" is useful." in + List.fold predicates ~init:(true, None) ~f:(fun (sat, out) predicate -> if sat then let env = @@ -161,6 +131,6 @@ let rec apply ~f:(fun out -> Environment.merge out env) ~default:env in - rule_match env predicate + eval env predicate else (sat, out)) diff --git a/lib/kernel/matchers/languages.ml b/lib/kernel/matchers/languages.ml index 0b12581..b10ea43 100644 --- a/lib/kernel/matchers/languages.ml +++ b/lib/kernel/matchers/languages.ml @@ -1,6 +1,6 @@ open Core_kernel -open Types.Syntax +open Types.Language.Syntax let ordinary_string = Some { delimiters = [{|"|}]; escape_character = '\\' } diff --git a/lib/kernel/matchers/matchers.ml b/lib/kernel/matchers/matchers.ml index 4823465..8939676 100644 --- a/lib/kernel/matchers/matchers.ml +++ b/lib/kernel/matchers/matchers.ml @@ -2,6 +2,10 @@ module Configuration = Configuration module Languages = Languages module Metasyntax = Metasyntax module Rule = Rule +module Ast = struct + include Types.Ast + include Ast +end module Evaluate = Evaluate module Alpha = Engine.Make(Alpha.Make) @@ -9,12 +13,15 @@ module Omega = Engine.Make(Omega.Make) module Engine = Types.Engine module Matcher = Types.Matcher -module Info = Types.Info -module Syntax = Types.Syntax module Hole = Types.Hole module Language = Types.Language module Script = Script module Specification = Specification -module Rewriter = Rewriter +module Template = struct + include Types.Template + include Template +end + +module Rewriter = Rewrite diff --git a/lib/kernel/matchers/matchers.mli b/lib/kernel/matchers/matchers.mli index dd651f0..2670db3 100644 --- a/lib/kernel/matchers/matchers.mli +++ b/lib/kernel/matchers/matchers.mli @@ -2,6 +2,9 @@ module Configuration = Configuration module Languages = Languages module Metasyntax = Metasyntax module Rule = Rule + +(* Only need to expose Types.Ast. module type of to export sexp. *) +module Ast : module type of Types.Ast module Evaluate = Evaluate module Alpha : Types.Engine.S @@ -9,12 +12,15 @@ module Omega : Types.Engine.S module Engine = Types.Engine module Matcher = Types.Matcher -module Info = Types.Info -module Syntax = Types.Syntax module Hole = Types.Hole module Language = Types.Language module Script : module type of Script module Specification : module type of Specification -module Rewriter = Rewriter +module Template : sig + include module type of Types.Template + include module type of Template +end + +module Rewriter = Rewrite diff --git a/lib/kernel/matchers/metasyntax.ml b/lib/kernel/matchers/metasyntax.ml index 136367a..314aba3 100644 --- a/lib/kernel/matchers/metasyntax.ml +++ b/lib/kernel/matchers/metasyntax.ml @@ -1,6 +1,5 @@ include Types.Metasyntax -(* Format.printf "%s@." @@ Matchers.Metasyntax.(json Matchers.Metasyntax.default_metasyntax);; *) let default_syntax = [ Hole (Everything, Delimited (Some ":[", Some "]")) ; Hole (Expression, Delimited (Some ":[", Some ":e]")) @@ -33,5 +32,6 @@ let default = create default_metasyntax module Default = (val default) +(* In utop: Format.printf "%s@." @@ Matchers.Metasyntax.(json Matchers.Metasyntax.default_metasyntax);; *) let json metasyntax = Yojson.Safe.pretty_to_string @@ to_yojson metasyntax diff --git a/lib/kernel/matchers/omega.ml b/lib/kernel/matchers/omega.ml index f4fff7d..bc41c81 100644 --- a/lib/kernel/matchers/omega.ml +++ b/lib/kernel/matchers/omega.ml @@ -1,8 +1,8 @@ open Core_kernel -open Angstrom +open Vangstrom -open Omega_parser +open Omega_parser_helper type omega_match_production = { offset : int @@ -19,20 +19,17 @@ type production = | Match of omega_match_production let configuration_ref = ref (Configuration.create ()) + +let implicit_equals_match_satisfied : bool ref = ref true let current_environment_ref : Match.Environment.t ref = ref (Match.Environment.create ()) let matches_ref : Match.t list ref = ref [] let source_ref : string ref = ref "" +let push_implicit_equals_match_satisfied : bool ref = ref true let push_environment_ref : Match.Environment.t ref = ref (Match.Environment.create ()) let push_matches_ref : Match.t list ref = ref [] let push_source_ref : string ref = ref "" -let (|>>) p f = - p >>= fun x -> return (f x) - -let ignore p = - p *> return () - let debug = match Sys.getenv "DEBUG_COMBY" with | exception Not_found -> false @@ -47,44 +44,12 @@ let actual = Buffer.create 10 let rewrite_template = ref "" -let substitute template env = - let substitution_formats = - [ ":[ ", "]" - ; ":[", ".]" - ; ":[", "\\n]" - ; ":[[", "]]" - ; ":[", "]" - ] - in - Match.Environment.vars env - |> List.fold ~init:(template, []) ~f:(fun (acc, vars) variable -> - match Match.Environment.lookup env variable with - | Some value -> - List.find_map substitution_formats ~f:(fun (left,right) -> - let pattern = left^variable^right in - if Option.is_some (String.substr_index template ~pattern) then - Some (String.substr_replace_all acc ~pattern ~with_:value, variable::vars) - else - None) - |> Option.value ~default:(acc,vars) - | None -> acc, vars) - -let infer_equality_constraints environment = - let vars = Match.Environment.vars environment in - List.fold vars ~init:[] ~f:(fun acc var -> - if String.is_suffix var ~suffix:"_equal" then - match String.split var ~on:'_' with - | _uuid :: target :: _equal -> - let expression = Rule.Ast.Equal (Variable var, Variable target) in - expression::acc - | _ -> acc - else - acc) - -module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struct +module Make (Language : Types.Language.S) (Meta : Metasyntax.S) = struct module rec Matcher : Types.Matcher.S = struct include Language.Info + module Template = Template.Make(Meta) + let wildcard = "_" (* This is the init we will pass in with a functor later *) @@ -103,6 +68,30 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc if debug then Format.printf "Match@."; acc + let create v = + Types.Ast.Template [Hole { variable = v; pattern = v; offset = 0; kind = Value }] + + let implicit_equals_satisfied environment identifier range matched = + let open Match in + if debug then Format.printf "Looking up %s@." identifier; + match Environment.lookup environment identifier with + | None -> Some (Environment.add ~range environment identifier matched) + | Some _ when String.(identifier = wildcard) -> Some environment + | Some existing_value -> + if debug then Format.printf "Existing: identifier: %s Value: %s@." identifier existing_value; + let Range.{ match_start = { offset; _ }; _ } = Option.value_exn (Environment.lookup_range environment identifier) in + if offset = range.match_start.offset then + (* case when already present from rest parser *) + Some environment + else if String.(existing_value = matched) then + (* equals existing. record a witness *) + let identifier' = Format.sprintf "%s_equal_%s" identifier (!configuration_ref.fresh ()) in + let environment' = Environment.add ~range environment identifier' matched in + Some environment' + else + (* exists and not equal *) + None + let r acc production : (production * 'a) t = let open Match in let open Location in @@ -113,35 +102,42 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc if debug then Format.printf "Saw String: %S@." s; return (Unit, acc) | Match { offset = pos_begin; identifier; text = content } -> - if debug then Format.printf "Match: %S @@ %d for %s@." content pos_begin identifier; - (* line/col values are placeholders and not accurate until processed in pipeline.ml *) - let before = { offset = pos_begin; line = 1; column = pos_begin + 1 } in - let pos_after_offset = pos_begin + String.length content in - let after = { offset = pos_after_offset; line = 1; column = pos_after_offset + 1 } in - let range = { match_start = before; match_end = after } in - let add identifier = Environment.add ~range !current_environment_ref identifier content in - let environment = - match Environment.exists !current_environment_ref identifier && String.(identifier <> wildcard) with - | true -> - let fresh_hole_id = - Format.sprintf "%s_%s_equal" (!configuration_ref.fresh ()) identifier - in - add fresh_hole_id - | false -> add identifier - in - current_environment_ref := environment; - return (Unit, acc) + (* Inefficiency: a Match production happens even for hole parsers in 'rest'. It's difficult to + tease out the right time to record, or manipulate the parsers here. Instead, we check whether + the environment already contains this hole (to avoid adding a _equal_ entry) by looking + at whether the variable at that offset is already recorded, which uniquely identifies whether + we've already seen it *) + begin + if debug then Format.printf "Match: %S @@ %d for %s@." content pos_begin identifier; + (* line/col values are placeholders and not accurate until processed in pipeline.ml *) + let before = { offset = pos_begin; line = 1; column = pos_begin + 1 } in + let pos_after_offset = pos_begin + String.length content in + let after = { offset = pos_after_offset; line = 1; column = pos_after_offset + 1 } in + let range = { match_start = before; match_end = after } in + if debug then Format.printf "record@."; + match implicit_equals_satisfied !current_environment_ref identifier range content with + | None -> implicit_equals_match_satisfied := false; return (Unit, acc) (* don't record, unsat *) + | Some environment -> + let environment = Environment.add ~range environment identifier content in + current_environment_ref := environment; + return (Unit, acc) + end | _ -> return (Unit, acc) + (* previous r cannot affect control flow match_context to ignore adding a match if a equivalence was refuted *) let record_match_context pos_before pos_after rule = let open Match.Location in if debug then Format.printf "match context start pos: %d@." pos_before; if debug then Format.printf "match context end pos %d@." pos_after; let extract_matched_text source { offset = match_start; _ } { offset = match_end; _ } = if debug then Format.printf "Attempt slice start %d end %d on %S@." match_start match_end source; - String.slice source match_start match_end + if match_start = 0 && match_end = 0 then + (* Special case: slice will return the whole string if match_start is + 0 and match_end is 0. It needs to be empty string *) + "" + else + String.slice source match_start match_end in - (* line/col values are placeholders and not accurate until processed in pipeline.ml *) let match_context = let match_start = { offset = pos_before; line = 1; column = pos_before + 1 } in let match_end = { offset = pos_after; line = 1; column = pos_after + 1 } in @@ -158,7 +154,7 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc | None -> if rewrite then begin - let result, _ = substitute !rewrite_template !current_environment_ref in + let result, _ = Template.substitute (Template.parse !rewrite_template) !current_environment_ref in (* Don't just append, but replace the match context including constant strings. I.e., somewhere where we are appending the parth that matched, it shouldn't, and instead just ignore. *) @@ -166,11 +162,18 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc end; matches_ref := match_context :: !matches_ref | Some rule -> - let rule = rule @ infer_equality_constraints !current_environment_ref in - (* FIXME Metasyntax should be propagated here. FIXME fresh should be propagated here.*) push_environment_ref := !current_environment_ref; - let sat, env = Program.apply ~metasyntax:Metasyntax.default_metasyntax ~substitute_in_place:true rule !current_environment_ref in + push_implicit_equals_match_satisfied := !implicit_equals_match_satisfied; + (* FIXME Metasyntax should be propagated here. *) + let sat, env = + Program.apply + ~metasyntax:Metasyntax.default_metasyntax + ~substitute_in_place:true + rule + !current_environment_ref + in current_environment_ref := !push_environment_ref; + implicit_equals_match_satisfied := !push_implicit_equals_match_satisfied; let new_env = if sat then env else None in match new_env with | None -> @@ -180,13 +183,12 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc if debug then Format.printf "Some new env@."; current_environment_ref := env; begin - let result, _ = substitute !rewrite_template !current_environment_ref in + let result, _ = Template.substitute (Template.parse !rewrite_template) !current_environment_ref in (* Don't just append, but replace the match context including constant strings. I.e., somewhere where we are appending the parth that matched, it shouldn't, and instead just ignore. *) Buffer.add_string actual result; end; - (* let match_context = { match_context with environment = !current_environment_ref } in *) (* Needed? *) matches_ref := match_context :: !matches_ref let multiline left right = @@ -245,125 +247,84 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc | Some until -> until | None -> assert false - module Deprecate = struct - let reserved_delimiters = - List.concat_map Language.Syntax.user_defined_delimiters ~f:(fun (from, until) -> [from; until]) - |> List.append [":["; "]"] - |> List.append [":[["; "]]"] - - let reserved = - reserved_delimiters @ [" "; "\n"; "\t"; "\r"] - |> List.sort ~compare:(fun v2 v1 -> - String.length v1 - String.length v2) - end + let reserved_holes = + List.map Template.Matching.hole_parsers ~f:(fun (_, parser) -> parser *> return "") let reserved_parsers = - let user_defined_delimiters = List.concat_map Language.Syntax.user_defined_delimiters ~f:(fun (from, until) -> [from; until]) in + let user_defined_delimiters = + List.concat_map Language.Syntax.user_defined_delimiters ~f:(fun (from, until) -> + [string from; string until]) in let user_defined_escapable_strings = match Language.Syntax.escapable_string_literals with | Some { delimiters; _ } -> - List.concat_map delimiters ~f:(fun delimiter -> [delimiter]) + List.concat_map delimiters ~f:(fun delimiter -> [string delimiter]) | None -> [] in let user_defined_raw_strings = - List.concat_map Language.Syntax.raw_string_literals ~f:(fun (from, until) -> [from; until]) + List.concat_map Language.Syntax.raw_string_literals ~f:(fun (from, until) -> + [string from; string until]) in - let hole_syntax = [ ":["; "]"; ":[["; ":]]" ] in - let spaces = [ " "; "\n"; "\t"; "\r" ] in - let reserved = - user_defined_delimiters - @ user_defined_escapable_strings - @ user_defined_raw_strings - @ hole_syntax - @ spaces + let user_defined_reserved_comments = + List.concat_map Language.Syntax.comments ~f:(function + | Multiline (left, right) -> [string left; string right] + | Nested_multiline (left, right) -> [string left; string right] + | Until_newline start -> [string start]) in - choice @@ List.map reserved ~f:string + let spaces1 = [ Omega_parser_helper.spaces1 ] in + [ user_defined_delimiters + ; reserved_holes + ; user_defined_escapable_strings + ; user_defined_raw_strings + ; user_defined_reserved_comments + ; spaces1 + ] + |> List.concat + |> choice let generate_single_hole_parser () = - (alphanum <|> char '_') |>> String.of_char + (alphanum <|> char '_') >>| String.of_char - let generate_everything_hole_parser - ?priority_left_delimiter:left_delimiter - ?priority_right_delimiter:right_delimiter - () = + let delimiters left right = + match left, right with + | Some left_delimiter, Some right_delimiter -> [ (left_delimiter, right_delimiter) ] + | _ -> Language.Syntax.user_defined_delimiters + + let between_nested_delims p delimiters = let between_nested_delims p from = let until = until_of_from from in between (string from) (string until) p - >>= fun result -> return (String.concat @@ [from] @ result @ [until]) - in - let between_nested_delims p = - let parsers = - match left_delimiter, right_delimiter with - | Some left_delimiter, Some right_delimiter -> [ (left_delimiter, right_delimiter) ] - | _ -> Language.Syntax.user_defined_delimiters - in - parsers - |> List.map ~f:fst - |> List.map ~f:(between_nested_delims p) - |> choice - in - let reserved = - let parsers = - match left_delimiter, right_delimiter with - | Some left_delimiter, Some right_delimiter -> [ (left_delimiter, right_delimiter) ] - | _ -> Language.Syntax.user_defined_delimiters - in - List.concat_map parsers ~f:(fun (from, until) -> [from; until]) + >>| fun result -> String.concat @@ [from] @ result @ [until] in + delimiters + |> List.map ~f:fst + |> List.map ~f:(between_nested_delims p) + |> choice + + let generate_everything_hole_parser + ?priority_left_delimiter:left + ?priority_right_delimiter:right + () = + let delimiters = delimiters left right in + let reserved = List.concat_map delimiters ~f:(fun (from, until) -> [from; until]) in + let other = not_followed_by (choice @@ List.map reserved ~f:string) *> any_char >>| String.of_char in fix (fun grammar -> - let delimsx = between_nested_delims (many grammar) in - let other = Omega_parser.Deprecate.any_char_except ~reserved |>> String.of_char in + let delims_over_holes = between_nested_delims (many grammar) delimiters in choice [ comment_parser ; raw_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents) ; escapable_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents) ; spaces1 - ; delimsx + ; delims_over_holes ; other ]) let generate_delimited_hole_parser - ?priority_left_delimiter:left_delimiter - ?priority_right_delimiter:right_delimiter + ?priority_left_delimiter:left + ?priority_right_delimiter:right () = - let between_nested_delims p from = - let until = until_of_from from in - between (string from) (string until) p - >>= fun result -> return (String.concat @@ [from] @ result @ [until]) - in - let between_nested_delims p = - let parsers = - match left_delimiter, right_delimiter with - | Some left_delimiter, Some right_delimiter -> [ (left_delimiter, right_delimiter) ] - | _ -> Language.Syntax.user_defined_delimiters - in - parsers - |> List.map ~f:fst - |> List.map ~f:(between_nested_delims p) - |> choice - in - let reserved = - let parsers = - match left_delimiter, right_delimiter with - | Some left_delimiter, Some right_delimiter -> [ (left_delimiter, right_delimiter) ] - | _ -> Language.Syntax.user_defined_delimiters - in - List.concat_map parsers ~f:(fun (from, until) -> [from; until]) - in - let inner = - fix (fun grammar -> - let delimsx = between_nested_delims (many grammar) in - let other = Omega_parser.Deprecate.any_char_except ~reserved |>> String.of_char in - choice - [ comment_parser - ; raw_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents) - ; escapable_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents) - ; spaces1 - ; delimsx - ; other - ]) - in - between_nested_delims (many inner) + between_nested_delims + (many @@ generate_everything_hole_parser ?priority_left_delimiter:left ?priority_right_delimiter:right ()) + (delimiters left right) (* this thing is wrapped by a many. also rename it to 'string hole match syntax per char' *) let escapable_literal_grammar ~right_delimiter = @@ -373,13 +334,178 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc choice [ (string (Format.sprintf "%c%s" escape_character right_delimiter)) ; (string (Format.sprintf "%c%c" escape_character escape_character)) - ; (Omega_parser.Deprecate.any_char_except ~reserved:[right_delimiter] |>> String.of_char) + ; (not_followed_by (string right_delimiter) *> any_char >>| String.of_char) ] let raw_literal_grammar ~right_delimiter = - (Omega_parser.Deprecate.any_char_except ~reserved:[right_delimiter] |>> String.of_char) + (not_followed_by (string right_delimiter) *> any_char >>| String.of_char) - let sequence_chain ?left_delimiter ?right_delimiter (p_list : (production * 'a) t list) = + let seq p_list = + List.fold p_list ~init:(return (Unit, "")) ~f:( *>) + + let convert ?left_delimiter ?right_delimiter (p_list : (production * 'a) t list) : + (production * 'a) t list = + let add_match user_state identifier p = + pos >>= fun offset -> + p >>= fun value -> + let m = + { offset + ; identifier + ; text = value + } + in + if debug then Format.printf "add_match@."; + r user_state (Match m) + in + List.fold (List.rev p_list) ~init:[] ~f:(fun acc p -> + match parse_string ~consume:All p "_signal_hole" with + | Error s -> + if debug then Format.printf "Composing p with terminating parser, error %s@." s; + p::acc + | Ok (Hole { sort; identifier; dimension; _ }, user_state) -> + begin + match sort with + | Regex -> + let separator = List.find_map_exn Meta.syntax ~f:(function + | Hole _ -> None + | Regex (_, separator, _) -> Some separator) + in + let identifier, pattern = String.lsplit2_exn identifier ~on:separator in (* FIXME parse *) + let identifier = if String.(identifier = "") then wildcard else identifier in + if debug then Format.printf "Regex: Id: %s Pat: %s@." identifier pattern; + let pattern, prefix = + if String.is_prefix pattern ~prefix:"^" then + (* FIXME: match beginning of input too *) + String.drop_prefix pattern 1, + Some ( + (char '\n' *> return "") + <|> + (pos >>= fun p -> if p = 0 then return "" else fail "") + ) + else + pattern, None + in + let pattern, suffix = + if String.is_suffix pattern ~suffix:"$" then + String.drop_suffix pattern 1, Some (char '\n' *> return "" <|> end_of_input *> return "") + else + pattern, None + in + let compiled_regexp = Regexp.PCRE.make_regexp pattern in + let regexp_parser = Regexp.PCRE.regexp compiled_regexp in + let regexp_parser = + match prefix, suffix with + | Some prefix, None -> prefix *> regexp_parser + | None, Some suffix -> regexp_parser <* suffix + | Some prefix, Some suffix -> prefix *> regexp_parser <* suffix + | None, None -> regexp_parser + in + (* the eof matters here for that one tricky test case *) + let base_parser = + [ regexp_parser + ; end_of_input >>= fun () -> return "" + ] + in + let hole_semantics = choice base_parser in + (add_match user_state identifier hole_semantics)::acc + + | Alphanum -> + let allowed = choice [alphanum; char '_'] >>| String.of_char in + let hole_semantics = many1 allowed >>| String.concat in + (add_match user_state identifier hole_semantics)::acc + + | Non_space -> + let non_space = + ([ Omega_parser_helper.skip space1 + ; Omega_parser_helper.skip reserved_parsers + ] + |> choice + |> not_followed_by) + *> any_char + >>| Char.to_string + in + let rest = + match acc with + | [] -> end_of_input *> return (Unit, "") + | _ -> + return () >>= fun () -> + seq acc >>= fun r -> + return r + in + let hole_semantics = many1 (not_followed_by rest *> non_space) >>| String.concat in + (add_match user_state identifier hole_semantics)::acc + + | Line -> + let allowed = + many (not_followed_by (string "\n" <|> string "\r\n") *> any_char ) + >>| fun x -> [(String.of_char_list x)^"\n"] + in + let hole_semantics = allowed <* char '\n' >>| String.concat in + (add_match user_state identifier hole_semantics)::acc + + | Blank -> + let hole_semantics = many1 blank >>| String.of_char_list in + (add_match user_state identifier hole_semantics)::acc + + | Expression -> + let non_space = + ([ Omega_parser_helper.skip space1 + ; Omega_parser_helper.skip reserved_parsers + ] + |> choice + |> not_followed_by) + *> any_char + >>| Char.to_string + in + let delimited = + generate_delimited_hole_parser + ?priority_left_delimiter:left_delimiter + ?priority_right_delimiter:right_delimiter + () + in + let matcher = non_space <|> delimited in + let rest = + match acc with + | [] -> end_of_input *> return (Unit, "") + | _ -> + return () >>= fun () -> + seq acc >>= fun r -> + return r + in + let hole_semantics = many1 (not_followed_by rest *> matcher) >>| String.concat in + (add_match user_state identifier hole_semantics)::acc + + | Everything -> + let matcher = + match dimension with + | Code -> + generate_everything_hole_parser + ?priority_left_delimiter:left_delimiter + ?priority_right_delimiter:right_delimiter + () + | Escapable_string_literal -> + let right_delimiter = Option.value_exn right_delimiter in + escapable_literal_grammar ~right_delimiter + | Raw_string_literal -> + let right_delimiter = Option.value_exn right_delimiter in + raw_literal_grammar ~right_delimiter + | Comment -> failwith "Unimplemented" + in + let rest = + match acc with + | [] -> end_of_input *> return (Unit, "") + | _ -> seq acc + in + let hole_semantics = many (not_followed_by rest *> matcher) >>| String.concat in + (add_match user_state identifier hole_semantics)::acc + end + | _ -> failwith "unreachable: _signal_hole parsed but not handled by Hole variant") + + let sequence_chain' ?left_delimiter ?right_delimiter p_list = + convert ?left_delimiter ?right_delimiter p_list + |> seq + + let sequence_chain_unused ?left_delimiter ?right_delimiter (p_list : (production * 'a) t list) = if debug then Format.printf "Sequence chain p_list size: %d@." @@ List.length p_list; let i = ref 0 in List.fold_right p_list ~init:(return (Unit, acc)) ~f:(fun p acc -> @@ -471,12 +597,12 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc end_of_input) else (if debug then Format.printf "hole until: append suffix@."; - skip_unit acc) + Omega_parser_helper.skip acc) in ( pos >>= fun pos -> if get_pos () = (-1) then set_pos pos; - let stop_at = choice [ rest; skip_unit reserved_parsers ] in + let stop_at = choice [ rest; Omega_parser_helper.skip reserved_parsers ] in many1_till_stop any_char stop_at (* Beware of this use. *) ) >>= fun value -> @@ -499,8 +625,8 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc | Line -> pos >>= fun offset -> let allowed = - many (Omega_parser.Deprecate.any_char_except ~reserved:["\n"]) - |>> fun x -> [(String.of_char_list x)^"\n"] + many (not_followed_by (char '\n') *> any_char) + >>| fun x -> [(String.of_char_list x)^"\n"] in allowed <* char '\n' >>= fun value -> acc >>= fun _ -> @@ -518,21 +644,21 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc let _non_space : string t = let rest = if !i = 0 then end_of_input - else skip_unit acc + else Omega_parser_helper.skip acc in ( pos >>= fun pos -> if get_pos () = (-1) then set_pos pos; - let stop_at = choice [ rest; skip_unit reserved_parsers ] in + let stop_at = choice [ rest; Omega_parser_helper.skip reserved_parsers ] in many1_till_stop any_char stop_at (* Beware of this use. *) - ) |>> String.of_char_list + ) >>| String.of_char_list in let non_space = - many1 (Omega_parser.Deprecate.any_char_except ~reserved:([" "]@Deprecate.reserved_delimiters)) |>> String.of_char_list + many1 (not_followed_by (Omega_parser_helper.skip (char ' ') <|> Omega_parser_helper.skip reserved_parsers) *> any_char) >>| String.of_char_list in let delimited = (* IDK why this rest works without end_of_input but it's needed for non_space. *) - let rest = skip_unit acc in + let rest = Omega_parser_helper.skip acc in (many1_till (pos >>= fun pos -> if debug then Format.printf "Pos is %d@." pos; @@ -607,7 +733,7 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc end_of_input) else (if debug then Format.printf "hole everything until: append suffix@."; - skip_unit acc) + Omega_parser_helper.skip acc) in let first_pos = ref (-1) in let set_pos v = first_pos := v in @@ -686,90 +812,41 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc string str >>= fun result -> r acc (Template_string (String.concat s1 ^ result)) - let single_hole_parser () = - string ":[[" *> identifier_parser () <* string "]]" - - let everything_hole_parser () = - string ":[" *> identifier_parser () <* string "]" - - let expression_hole_parser () = - string ":[" *> identifier_parser () <* string ":e" <* string "]" - - let non_space_hole_parser () = - string ":[" *> identifier_parser () <* string ".]" - - let line_hole_parser () = - string ":[" *> identifier_parser () <* string "\\n]" - - let blank_hole_parser () = - string ":[" - *> many1 blank - *> identifier_parser () - <* string "]" - - let regex_expression () = - fix (fun expr -> - choice - [ lift (fun x -> Format.sprintf "[%s]" @@ String.concat x) (char '[' *> many1 expr <* char ']') - ; lift (fun c -> Format.sprintf {|\%c|} c) (char '\\' *> any_char) - ; lift Char.to_string (not_char ']') - ]) - - let regex_body () = - lift2 - (fun v e -> Format.sprintf "%s~%s" v (String.concat e)) - (identifier_parser ()) - (char '~' *> many1 (regex_expression ())) - - let regex_hole_parser () = - string ":[" *> regex_body () <* string "]" - let hole_parser sort dimension : (production * 'a) t t = - let hole_parser = - match sort with - | Types.Hole.Alphanum -> single_hole_parser () - | Everything -> everything_hole_parser () - | Blank -> blank_hole_parser () - | Line -> line_hole_parser () - | Non_space -> non_space_hole_parser () - | Expression -> expression_hole_parser () - | Regex -> regex_hole_parser () + let hole_parser = (* This must be fold, can't be find *) + let open Polymorphic_compare in + List.fold ~init:[] Template.Matching.hole_parsers ~f:(fun acc (sort', parser) -> + if sort' = sort then parser::acc else acc) in - let skip_signal hole = skip_unit (string "_signal_hole") |>> fun () -> (Hole hole, acc) in - hole_parser |>> fun identifier -> skip_signal { sort; identifier; dimension; at_depth = None } + let skip_signal hole = Omega_parser_helper.skip (string "_signal_hole") >>| fun () -> (Hole hole, acc) in + match hole_parser with + | [] -> fail "none" (* not defined *) + | l -> + choice l >>| function identifier -> + skip_signal { sort; identifier; dimension; at_depth = None } - let reserved_holes () = - [ single_hole_parser () - ; everything_hole_parser () - ; non_space_hole_parser () - ; line_hole_parser () - ; blank_hole_parser () - ; expression_hole_parser () - ] - - let generate_hole_for_literal sort ~contents ~left_delimiter ~right_delimiter () = + let generate_hole_for_literal dimension ~contents ~left_delimiter ~right_delimiter () = let literal_holes = - Types.Hole.sorts () - |> List.map ~f:(fun kind -> hole_parser kind sort) (* Note: Uses attempt in alpha *) - |> choice - in - let _reserved_holes = - reserved_holes () - |> List.map ~f:skip_unit - |> choice + choice @@ List.map Template.Matching.hole_parsers ~f:(fun (kind, _) -> hole_parser kind dimension) in + let reserved_holes = List.map reserved_holes ~f:Omega_parser_helper.skip in + let other = Omega_parser_helper.( + up_to @@ + choice + [ (spaces1 *> return ()) + ; (choice reserved_holes *> return ()) + ] + >>| String.of_char_list) in let parser = many @@ choice [ literal_holes - ; (spaces1 |>> generate_pure_spaces_parser) - ; ((many1 (Omega_parser.Deprecate.any_char_except ~reserved:[":["; " "; "\n"; "\t"; "\r"]) - |>> String.of_char_list) - |>> generate_string_token_parser) + ; (spaces1 >>| generate_pure_spaces_parser) + ; (other >>| generate_string_token_parser) ] in match parse_string ~consume:All parser contents with - | Ok parsers -> sequence_chain ~left_delimiter ~right_delimiter parsers + | Ok parsers -> sequence_chain' ~left_delimiter ~right_delimiter parsers | Error _ -> failwith "If this failure happens it is a bug: Converting a \ quoted string in the template to a parser list should \ @@ -782,12 +859,12 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc (many1 (comment_parser <|> spaces1)) in let other = - (many1 (Omega_parser.Deprecate.any_char_except ~reserved:Deprecate.reserved) |>> String.of_char_list) - |>> generate_string_token_parser + (many1 (not_followed_by reserved_parsers *> any_char) >>| String.of_char_list) + >>| generate_string_token_parser in let code_holes = - Types.Hole.sorts () - |> List.map ~f:(fun kind -> hole_parser kind Code) + Template.Matching.hole_parsers + |> List.map ~f:(fun (sort, _) -> hole_parser sort Code) |> choice in fix (fun (generator : (production * 'a) t list t) -> @@ -800,7 +877,7 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc >>= fun (g: (production * 'a) t list) -> if debug then Format.printf "G size: %d; delim %s@." (List.length g) left_delimiter; return @@ - sequence_chain @@ + sequence_chain' @@ [string left_delimiter >>= fun result -> r acc (Template_string result)] @ g @ [ string right_delimiter >>= fun result -> r acc (Template_string result)]) @@ -817,7 +894,7 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc if debug then Format.printf "Produced %d parsers in main generator@." @@ List.length x; return x ) - |>> fun p_list -> + >>| fun p_list -> match p_list with | [] -> (* The template is the empty string and source is nonempty. We need to @@ -826,7 +903,7 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc r acc Unit | p_list -> p_list - |> sequence_chain + |> sequence_chain' |> fun matcher -> match !configuration_ref.match_kind with | Exact -> @@ -835,7 +912,8 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc matcher >>= fun _access_last_production_here -> pos >>= fun end_pos -> end_of_input >>= fun _ -> - record_match_context start_pos end_pos rule; + if !implicit_equals_match_satisfied then record_match_context start_pos end_pos rule; + implicit_equals_match_satisfied := true; (* reset *) current_environment_ref := Match.Environment.create (); r acc Unit | Fuzzy -> @@ -844,7 +922,7 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc [ comment_parser ; (raw_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents)) ; (escapable_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents)) - ; any_char |>> Char.to_string + ; any_char >>| Char.to_string ] in let match_one = @@ -864,42 +942,31 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc else return ()) >>= fun () -> if debug then Format.printf "Calculated end_pos %d@." end_pos; - record_match_context start_pos end_pos rule; + if !implicit_equals_match_satisfied then record_match_context start_pos end_pos rule; + implicit_equals_match_satisfied := true; (* reset *) current_environment_ref := Match.Environment.create (); return (Unit, "") in (* many1 may be appropriate *) let prefix = (prefix >>= fun s -> r acc (String s)) in let first_match_attempt = choice [match_one; prefix] in (* consumes a character in prefix if no match *) - let matches = many_till first_match_attempt end_of_input in + let matches = many first_match_attempt *> end_of_input in matches >>= fun _result -> r acc Unit let to_template template rule = - let state = Buffered.parse (general_parser_generator rule) in - let state = Buffered.feed state (`String template) in - Buffered.feed state `Eof - |> function - | Buffered.Done ({ len; _ }, p) -> - if len <> 0 then failwith @@ Format.sprintf "Input left over in template where not expected: %d" len; - Ok p + match parse_string ~consume:All (general_parser_generator rule) template with + | Ok p -> Ok p | _ -> Or_error.error_string "Template could not be parsed." let run_the_parser_for_first p source : Match.t Or_error.t = push_source_ref := !source_ref; source_ref := source; - let state = Buffered.parse p in - let state = Buffered.feed state (`String source) in - let state = Buffered.feed state `Eof in - match state with - | Buffered.Done ({ len; off; _ }, (_, _result_string)) -> + match parse_string ~consume:All p source with + | Ok _ -> source_ref := !push_source_ref; if rewrite then Format.eprintf "Result string:@.---@.%s---@." @@ Buffer.contents actual; - if len <> 0 then - (if debug then Format.eprintf "Input left over in parse where not expected: off(%d) len(%d)" off len; - Or_error.error_string "Does not match template") - else - Ok (Match.create ()) (* Fake for now *) + Ok (Match.create ()) (* Fake match result--currently using refs *) | _ -> source_ref := !push_source_ref; Or_error.error_string "No matches" @@ -941,13 +1008,14 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc in Match.create ~range () - let all ?configuration ?rule ?(nested = false) ~template ~source:original_source () : Match.t list = + let all ?configuration ?(rule = [Types.Ast.True]) ~template ~source:original_source () : Match.t list = push_matches_ref := !matches_ref; configuration_ref := Option.value configuration ~default:!configuration_ref; + let Rule.{ nested } = Rule.options rule in let rec aux_all ?configuration ?(nested = false) ~template ~source () = matches_ref := []; if String.is_empty template && String.is_empty source then [trivial] - else match first_is_broken template source rule with + else match first_is_broken template source (Some rule) with (* FIXME always Some rule *) | Ok _ | Error _ -> let matches = List.rev !matches_ref in @@ -1024,27 +1092,24 @@ module Make (Language : Types.Language.S) (Unimplemented : Metasyntax.S) = struc | [] -> Or_error.error_string "No result" | (hd::_) -> Ok hd (* FIXME be efficient *) end + and Program : sig val apply : ?substitute_in_place:bool - -> ?fresh:(unit -> string) -> ?metasyntax:Metasyntax.t -> Rule.t -> Match.environment -> Evaluate.result end = struct - let apply ?(substitute_in_place = true) - ?(fresh = Evaluate.counter) ?metasyntax rule env = Evaluate.apply ~substitute_in_place - ~fresh ?metasyntax - ~match_all:(Matcher.all ~rule:[Rule.Ast.True] ~nested:false) (* FIXME propagated nested *) + ~match_all:(Matcher.all ~rule:[Types.Ast.True]) rule env end diff --git a/lib/kernel/matchers/omega_parser.ml b/lib/kernel/matchers/omega_parser_helper.ml similarity index 58% rename from lib/kernel/matchers/omega_parser.ml rename to lib/kernel/matchers/omega_parser_helper.ml index f7802e0..df5102b 100644 --- a/lib/kernel/matchers/omega_parser.ml +++ b/lib/kernel/matchers/omega_parser_helper.ml @@ -1,35 +1,28 @@ open Core_kernel -open Angstrom +open Vangstrom -let (|>>) p f = - p >>= fun x -> return (f x) +let skip p = p *> return () + +let up_to p = + many1 (not_followed_by p *> any_char) let between left right p = left *> p <* right -let zero = +let zero : 'a Vangstrom.t = fail "" let cons x xs = x :: xs -let debug = true +let many_till p t = + fix (fun m -> (t *> return []) <|> (lift2 cons p m)) -let dont_use_any_char_except_parser p = - if debug then Format.printf "Entered@."; - let stop = ref false in - let set_stop v = stop := v in - let get_stop () = !stop in - let c = - choice - [ (p >>= fun reserved -> pos >>= fun po -> (if debug then Format.printf "1. stop @@ %s @@ %d@." reserved po; return (set_stop true)) >>= fun _ -> fail "stop") - ; (return () >>= fun _ -> Format.printf "X@."; if get_stop () then (if debug then Format.printf "2. stop@."; fail "stop") else any_char) - ] - in - c >>= fun c' -> if debug then Format.printf "Parsed: %c@." c'; if debug then Format.printf "Exit@."; return c' +let many1_till p t = + lift2 cons p (many_till p t) -let dont_use_is_not p = - dont_use_any_char_except_parser p +let ignore p = + p *> return () let many_till_stop p t = let stop = ref false in @@ -55,45 +48,6 @@ let many1_till_stop p t = lift2 cons one (many_till_stop p t) -(* use many1_till_stop instead of "many1 (any_allowed_except_parser allowed until" *) -(* -let any_allowed_except_parser allowed p = - let rewind = ref false in - let set_rewind v = rewind := v in - let get_rewind () = !rewind in - choice - [ (p >>= fun _ -> (return (set_rewind true)) >>= fun _ -> fail "bad") - ; (return () >>= fun _ -> if get_rewind () then fail "rewind" else allowed) - (* TODO this needs some kind of EOF condition to work for both template and match parsing *) - ] -*) - -let alphanum = - satisfy (function - | 'a' .. 'z' - | 'A' .. 'Z' - | '0' .. '9' -> true - | _ -> false) - -let is_whitespace = function - | ' ' | '\t' | '\r' | '\n' -> true - | _ -> false - -let blank = - choice - [ char ' ' - ; char '\t' - ] - -let many_till p t = - fix (fun m -> (t *> return []) <|> (lift2 cons p m)) - -let many1_till p t = - lift2 cons p (many_till p t) - -let skip_unit p = - p |>> ignore - module Deprecate = struct (* XXX can shortcircuit *) (* what if you hit a reserved @@ -118,23 +72,36 @@ module Deprecate = struct | `Reserved_sequence -> fail "reserved sequence hit" end -(** must have at least one, otherwise spins on the empty string. for some reason - many1 spaces is not equivalent (spins on empty space?). *) -let spaces1 = - satisfy is_whitespace >>= fun c -> - (* XXX use skip_while once everything works. - we don't need the string *) - take_while is_whitespace >>= fun s -> - return (Format.sprintf "%c%s" c s) + +let alphanum = + satisfy (function + | 'a' .. 'z' + | 'A' .. 'Z' + | '0' .. '9' -> true + | _ -> false) + +let is_whitespace = function + | ' ' | '\t' | '\r' | '\n' -> true + | _ -> false + +let blank = + choice + [ char ' ' + ; char '\t' + ] + +let space1 = + satisfy is_whitespace let spaces = take_while is_whitespace >>= fun s -> return s +let spaces1 = + satisfy is_whitespace >>= fun c -> + take_while is_whitespace >>= fun s -> + return (Format.sprintf "%c%s" c s) + let identifier_parser () = many (alphanum <|> char '_') - |>> String.of_char_list - -let many1_till p t = - let cons x xs = x::xs in - lift2 cons p (many_till p t) + >>| String.of_char_list diff --git a/lib/kernel/matchers/regexp.ml b/lib/kernel/matchers/regexp.ml index 69e9834..ef0bd59 100644 --- a/lib/kernel/matchers/regexp.ml +++ b/lib/kernel/matchers/regexp.ml @@ -1,4 +1,4 @@ -open Angstrom +open Vangstrom let debug = match Sys.getenv "DEBUG_COMBY" with @@ -38,7 +38,7 @@ module Make (Regexp: Regexp_engine_intf) = struct (* FIXME: size. about advance => want to use internal unsafe_apply_opt actually. cf. string_ in angstrom.ml. instead, trying "do peek, then advance/commit." *) - let regexp rex : string Angstrom.t = + let regexp rex = (* Why do Unsafe if I can just do peek_string? => So I don't allocate on copy of buffer. *) (* But it looks like we can't avoid allocation in converting bigstringaf to bytes *) Unsafe.peek 1 (fun buffer ~off ~len:_ -> Bigstringaf.length buffer - off) >>= fun n -> diff --git a/lib/kernel/matchers/regexp.mli b/lib/kernel/matchers/regexp.mli index e73aaf2..49a2c03 100644 --- a/lib/kernel/matchers/regexp.mli +++ b/lib/kernel/matchers/regexp.mli @@ -23,7 +23,7 @@ module Make (Regexp : Regexp_engine_intf): sig val make_regexp : string -> Regexp.t - val regexp : Regexp.t -> string Angstrom.t + val regexp : Regexp.t -> string Vangstrom.t end module PCRE : sig diff --git a/lib/kernel/matchers/rewrite.ml b/lib/kernel/matchers/rewrite.ml index 699df2a..f4a7d58 100644 --- a/lib/kernel/matchers/rewrite.ml +++ b/lib/kernel/matchers/rewrite.ml @@ -1,3 +1,4 @@ +open Vangstrom open Core_kernel open Match @@ -8,105 +9,76 @@ let debug = | exception Not_found -> false | _ -> true -(* override default metasyntax for identifiers to accomodate fresh variable generation and UUID - identifiers that contain -, etc. *) -let match_context_syntax = - let identifier = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-" in - Metasyntax.{ default_metasyntax with identifier } +let counter = + let uuid_for_id_counter = ref 0 in + fun () -> + uuid_for_id_counter := !uuid_for_id_counter + 1; + Format.sprintf "%d" !uuid_for_id_counter -let match_context_metasyntax = - Metasyntax.(create match_context_syntax) +let replacement_sentinel metasyntax = + let open Types.Metasyntax in + List.find_map metasyntax.syntax ~f:(function + | Hole (Everything, Delimited (left, right)) -> + let left = Option.value left ~default:"" in + let right = Option.value right ~default:"" in + Some (left, right) + | Regex (left, _, right) -> + Some (left, right) + | _ -> None) + |> function + | Some v -> v + | None -> failwith "A custom metasyntax must define syntax for an Everything hole or Regex to customize rewriting" -module Match_context_metasyntax = (val match_context_metasyntax) -module Match_context_template = Rewrite_template.Make(Match_context_metasyntax) - -let substitute_match_contexts ?fresh (matches: Match.t list) source replacements = - if debug then Format.printf "Matches: %d | Replacements: %d@." (List.length matches) (List.length replacements); - let rewrite_template, environment = - List.fold2_exn - matches replacements - ~init:(source, Environment.create ()) - ~f:(fun (rewrite_template, accumulator_environment) - ({ environment = _match_environment; _ } as match_) - { replacement_content; _ } -> - (* create a hole in the rewrite template based on this match context *) - let sub_fresh = Option.map fresh ~f:(fun f -> fun () -> ("sub_" ^ f ())) in (* ensure custom fresh function is unique for substition. *) - let hole_id, rewrite_template = Rewrite_template.of_match_context ?fresh:sub_fresh match_ ~source:rewrite_template in - if debug then Format.printf "Created rewrite template with hole var %s: %s @." hole_id rewrite_template; - (* add this match context replacement to the environment *) - let accumulator_environment = Environment.add accumulator_environment hole_id replacement_content in - (* update match context replacements offset *) - rewrite_template, accumulator_environment) +(** Parse the first :[id(label)] label encountered in the template. *) +let parse_first_label ?(metasyntax = Metasyntax.default_metasyntax) template = + let label = take_while (function | '0' .. '9' | 'a' .. 'z' | 'A' .. 'Z' | '_' -> true | _ -> false) in + let left, right = replacement_sentinel metasyntax in + let parser = + many @@ + choice + [ lift3 (fun _ label _ -> Some label) (string (left^"id(")) label (string (")"^right)) + ; any_char >>| fun _ -> None + ] in - if debug then Format.printf "Env:@.%s" (Environment.to_string environment); - if debug then Format.printf "Rewrite in:@.%s@." rewrite_template; - let variables = Match_context_template.variables rewrite_template in - let rewritten_source = Rewrite_template.substitute ~metasyntax:match_context_syntax ?fresh rewrite_template environment |> fst in - if debug then Format.printf "Rewritten source:@.%s@." rewritten_source; - let offsets = Rewrite_template.get_offsets_for_holes variables rewrite_template in - if debug then - Format.printf "Replacements: %d | Offsets 1: %d@." (List.length replacements) (List.length offsets); - let offsets = Rewrite_template.get_offsets_after_substitution offsets environment in - if debug then - Format.printf "Replacements: %d | Offsets 2: %d@." (List.length replacements) (List.length offsets); - let in_place_substitutions = - List.map2_exn replacements offsets ~f:(fun replacement (_uid, offset) -> - let match_start = { Location.default with offset } in - let offset = offset + String.length replacement.replacement_content in - let match_end = { Location.default with offset } in - let range = Range.{ match_start; match_end } in - { replacement with range }) - in - { rewritten_source - ; in_place_substitutions - } + parse_string ~consume:All parser template + |> function + | Ok label -> List.find_map label ~f:ident + | Error _ -> None + +let substitute_fresh + ?(metasyntax = Metasyntax.default_metasyntax) + ?(fresh = counter) + template = + let label_table = String.Table.create () in + let template_ref = ref template in + let current_label_ref = ref (parse_first_label ~metasyntax !template_ref) in + while Option.is_some !current_label_ref do + let label = Option.value_exn !current_label_ref in + let id = + match String.Table.find label_table label with + | Some id -> id + | None -> + let id = fresh () in + if String.(label <> "") then String.Table.add_exn label_table ~key:label ~data:id; + id + in + let left, right = replacement_sentinel metasyntax in + let pattern = left ^ "id(" ^ label ^ ")" ^ right in + template_ref := String.substr_replace_first !template_ref ~pattern ~with_:id; + current_label_ref := parse_first_label ~metasyntax !template_ref; + done; + !template_ref -(** - store range information for this match_context replacement: - (a) its offset in the original source - (b) its replacement context (to calculate the range) - (c) an environment of values that are updated to reflect their relative offset in the rewrite template -*) let substitute_in_rewrite_template ?fresh ?(metasyntax = Metasyntax.default_metasyntax) - rewrite_template - ({ environment; _ } : Match.t) = + template + environment = let (module M) = Metasyntax.create metasyntax in - let module Template_parser = Rewrite_template.Make(M) in - let variables = Template_parser.variables rewrite_template in - - let replacement_content, vars_substituted_for = - Rewrite_template.substitute - ~metasyntax - ?fresh - rewrite_template - environment - in - let offsets = Rewrite_template.get_offsets_for_holes variables rewrite_template in - let offsets = Rewrite_template.get_offsets_after_substitution offsets environment in - let environment = - List.fold offsets ~init:(Environment.create ()) ~f:(fun acc (var, relative_offset) -> - if List.mem vars_substituted_for var ~equal:String.equal then - let value = Option.value_exn (Environment.lookup environment var) in - (* FIXME(RVT): Location does not update row/column here *) - let start_location = - Location.{ default with offset = relative_offset } - in - let end_location = - let offset = relative_offset + String.length value in - Location.{ default with offset } - in - let range = - Range. - { match_start = start_location - ; match_end = end_location - } - in - Environment.add ~range acc var value - else - acc) - in + let module Template_parser = Template.Make(M) in + let template = substitute_fresh ~metasyntax ?fresh template in + let terms = Template_parser.parse template in + let replacement_content, environment = Template_parser.substitute terms environment in { replacement_content ; environment ; range = @@ -115,21 +87,43 @@ let substitute_in_rewrite_template } } -let all ?source ?metasyntax ?fresh ~rewrite_template matches : result option = - if List.is_empty matches then None else - match source with - (* in-place substitution *) - | Some source -> - let matches : Match.t list = List.rev matches in - matches - |> List.map ~f:(substitute_in_rewrite_template ?metasyntax ?fresh rewrite_template) - |> substitute_match_contexts ?fresh matches source - |> Option.some - (* no in place substitution, emit result separated by newlines *) - | None -> - matches - |> List.map ~f:(substitute_in_rewrite_template ?metasyntax ?fresh rewrite_template) - |> List.map ~f:(fun { replacement_content; _ } -> replacement_content) - |> String.concat ~sep:"\n" - |> (fun rewritten_source -> { rewritten_source; in_place_substitutions = [] }) - |> Option.some +let substitute ?(metasyntax = Metasyntax.default_metasyntax) ?fresh template env = + let { replacement_content; _ } = substitute_in_rewrite_template ?fresh ~metasyntax template env + in replacement_content + +let substitute_matches (matches: Match.t list) source replacements = + if debug then Format.printf "Matches: %d | Replacements: %d@." (List.length matches) (List.length replacements); + let rewritten_source, in_place_substitutions, _ = + (* shift adjusts the difference of the matched part and the replacement part to the matched offsets *) + List.fold2_exn matches replacements ~init:(source, [], 0) ~f:(fun (rolling_result, replacements, shift) { range; _ } ({ replacement_content; _ } as r) -> + let start_index = range.match_start.offset + shift in + let end_index = range.match_end.offset + shift in + let before = if start_index = 0 then "" else String.slice rolling_result 0 start_index in + let after = String.slice rolling_result end_index (String.length rolling_result) in + let match_length = end_index - start_index in + let difference = String.length replacement_content - match_length in + let range = Range.{ match_start = Location.{ default with offset = start_index }; match_end = Location.{ default with offset = end_index + difference } } in + let replacements = { r with range }::replacements in + String.concat [before; replacement_content; after], replacements, shift + difference) + in + { rewritten_source + ; in_place_substitutions + } + +let all ?source ?metasyntax ?fresh ~rewrite_template rev_matches : result option = + Option.some_if (not (List.is_empty rev_matches)) @@ + match source with + (* in-place substitution *) + | Some source -> + rev_matches + |> List.map ~f:(fun Match.{ environment; _ } -> substitute_in_rewrite_template ?metasyntax ?fresh rewrite_template environment) + |> substitute_matches rev_matches source + (* no in place substitution, emit result separated by newlines *) + | None -> + let buf = Buffer.create 20 in + List.iter rev_matches ~f:(fun m -> + substitute_in_rewrite_template ?metasyntax ?fresh rewrite_template m.environment + |> fun { replacement_content; _ } -> + Buffer.add_string buf replacement_content; + Buffer.add_char buf '\n'); + { rewritten_source = Buffer.contents buf; in_place_substitutions = [] } diff --git a/lib/kernel/matchers/rewrite.mli b/lib/kernel/matchers/rewrite.mli index 3d8d1d1..6e21b9c 100644 --- a/lib/kernel/matchers/rewrite.mli +++ b/lib/kernel/matchers/rewrite.mli @@ -1,3 +1,14 @@ +open Match + +(** if [fresh] is set, then substitute the pattern :[id()] starting at 1, and + incrementing subsequent IDs. If [fresh] is unset, then by default substitute + the pattern :[id()] starting at 1, and increment for each occurence of + :[id()], left to right. *) +val substitute_fresh : ?metasyntax:Metasyntax.t -> ?fresh:(unit -> string) -> string -> string + +(** substitute returns the result of substituting env in template *) +val substitute : ?metasyntax:Metasyntax.t -> ?fresh:(unit -> string) -> string -> Environment.t -> string + (** if [source] is given, substitute in-place. If not, emit result separated by newlines *) val all diff --git a/lib/kernel/matchers/rewrite_template.ml b/lib/kernel/matchers/rewrite_template.ml deleted file mode 100644 index 9bb092d..0000000 --- a/lib/kernel/matchers/rewrite_template.ml +++ /dev/null @@ -1,281 +0,0 @@ -open Angstrom -open Core_kernel - -open Match - -let debug = - match Sys.getenv "DEBUG_COMBY" with - | exception Not_found -> false - | _ -> true - -type syntax = - { variable: string - ; pattern: string - } -[@@deriving sexp_of] - -type extracted = - | Hole of syntax - | Constant of string -[@@deriving sexp_of] - -module Make (Metasyntax : Types.Metasyntax.S) = struct - - let alphanum = - satisfy (function - | 'a' .. 'z' - | 'A' .. 'Z' - | '0' .. '9' -> true - | _ -> false) - - let blank = - choice - [ char ' ' - ; char '\t' - ] - - let ignore p = - p *> return () - - let p = function - | Some delim -> ignore @@ (string delim) - | None -> return () - - let any_char_except ~reserved = - List.fold reserved - ~init:(return `OK) - ~f:(fun acc reserved_sequence -> - option `End_of_input - (peek_string (String.length reserved_sequence) - >>= fun s -> - if String.equal s reserved_sequence then - return `Reserved_sequence - else - acc)) - >>= function - | `OK -> any_char - | `End_of_input -> any_char - | `Reserved_sequence -> fail "reserved sequence hit" - - let identifier () = - choice @@ List.map ~f:char (String.to_list Metasyntax.identifier) - - let identifier () = - both - (option false (char '?' >>| fun _ -> true)) - (many1 (identifier ()) >>| String.of_char_list) - - let regex_expression suffix = - fix (fun expr -> - choice - [ lift (fun x -> Format.sprintf "[%s]" @@ String.concat x) (char '[' *> many1 expr <* char ']') - ; lift (fun c -> Format.sprintf {|\%c|} c) (char '\\' *> any_char) - ; lift String.of_char (any_char_except ~reserved:[suffix]) - ]) - - let regex_body separator suffix = - lift2 - (fun v e -> v, e) - (identifier ()) - (char separator *> many1 (regex_expression suffix)) - - let hole_parsers = - (* Fold left to respect order of definitions in custom metasyntax for - matching, where we attempt to parse in order. Note this is significant if - a syntax like $X~regex should be tried before shortcircuiting on $X, in - which case it should be defined _after_ the $X syntax (most general - should be first). *) - List.fold ~init:[] Metasyntax.syntax ~f:(fun acc v -> - let v = - match v with - | Hole (_, Delimited (left, right)) -> - p left *> identifier () <* p right >>| - fun (o, v) -> - Format.sprintf "%s%s%s%s" (Option.value left ~default:"") (if o then "?" else "") v (Option.value right ~default:""), - v - | Hole (_, Reserved_identifiers l) -> - choice (List.map l ~f:string) >>| fun v -> v, v - | Regex (left, separator, right) -> - p (Some left) *> regex_body separator right <* p (Some right) >>| - fun ((_, v), expr) -> - (Format.sprintf "%s%s%c%s%s" left v separator (String.concat expr) right), - v - in - v::acc) - - let hole_prefixes = - List.map Metasyntax.syntax ~f:(function - | Hole (_, Delimited (Some left, _)) - | Regex (left, _, _) -> Some [left] - | Hole (_, Reserved_identifiers l) -> Some l - | _ -> None) - |> List.filter_opt - |> List.concat - - (** Not smart enough: only looks for hole prefix to stop scanning constant, - because there isn't a good 'not' parser *) - let parse_template : extracted list Angstrom.t = - let hole = choice hole_parsers in - many @@ choice - [ (hole >>| fun (pattern, variable) -> Hole { pattern; variable } ) - ; (((many1 @@ any_char_except ~reserved:hole_prefixes)) >>| fun c -> Constant (String.of_char_list c)) - ; any_char >>| fun c -> Constant (Char.to_string c) (* accept anything as constant not accepted by attempting holes above *) - ] - - let parse template = - match parse_string ~consume:All parse_template template with - | Ok result -> Some result - | Error e -> failwith ("No rewrite template parse: "^e) - - let variables template = - parse template - |> function - | Some result -> - List.filter_map result ~f:(function - | Hole { pattern; variable } -> Some { pattern; variable } - | _ -> None) - | None -> - [] -end - -let counter = - let uuid_for_id_counter = ref 0 in - fun () -> - uuid_for_id_counter := !uuid_for_id_counter + 1; - Format.sprintf "%d" !uuid_for_id_counter - -let sub_counter = - let uuid_for_sub_counter = ref 0 in - fun () -> - uuid_for_sub_counter := !uuid_for_sub_counter + 1; - Format.sprintf "sub_%d" !uuid_for_sub_counter - -let replacement_sentinel metasyntax = - let open Types.Metasyntax in - List.find_map metasyntax.syntax ~f:(function - | Hole (Everything, Delimited (left, right)) -> - let left = Option.value left ~default:"" in - let right = Option.value right ~default:"" in - Some (left, right) - | Regex (left, _, right) -> - Some (left, right) - | _ -> None) - |> function - | Some v -> v - | None -> failwith "A custom metasyntax must define syntax for an Everything hole or Regex to customize rewriting" - -(** Parse the first :[id(label)] label encountered in the template. *) -let parse_first_label ?(metasyntax = Metasyntax.default_metasyntax) template = - let label = take_while (function | '0' .. '9' | 'a' .. 'z' | 'A' .. 'Z' | '_' -> true | _ -> false) in - let left, right = replacement_sentinel metasyntax in - let parser = - many @@ - choice - [ (string (left^"id(") *> label <* string (")"^right) >>= fun label -> return (Some label)) - ; any_char >>= fun _ -> return None - ] - in - parse_string ~consume:All parser template - |> function - | Ok label -> List.find_map label ~f:ident - | Error _ -> None - -let substitute_fresh - ?(metasyntax = Metasyntax.default_metasyntax) - ?(fresh = counter) - template = - let label_table = String.Table.create () in - let template_ref = ref template in - let current_label_ref = ref (parse_first_label ~metasyntax !template_ref) in - while Option.is_some !current_label_ref do - let label = Option.value_exn !current_label_ref in - let id = - match String.Table.find label_table label with - | Some id -> id - | None -> - let id = fresh () in - if String.(label <> "") then - String.Table.add_exn label_table ~key:label ~data:id; - id - in - let left, right = replacement_sentinel metasyntax in - let pattern = left ^ "id(" ^ label ^ ")" ^ right in - template_ref := String.substr_replace_first !template_ref ~pattern ~with_:id; - current_label_ref := parse_first_label ~metasyntax !template_ref; - done; - !template_ref - -let substitute ?(metasyntax = Metasyntax.default_metasyntax) ?fresh template env = - let (module M) = Metasyntax.create metasyntax in - let module Template_parser = Make(M) in - let vars = Template_parser.variables template in - let template = substitute_fresh ~metasyntax ?fresh template in - if debug then Format.printf "Template after substituting fresh: %s@." template; - - List.fold vars ~init:(template, []) ~f:(fun (acc, vars) { variable; pattern } -> - match Environment.lookup env variable with - | Some value -> - if Option.is_some (String.substr_index template ~pattern) then - String.substr_replace_all acc ~pattern ~with_:value, variable::vars - else - acc, vars - | None -> - acc, vars) - -(** Uses metasyntax to substitute fresh variables in the match_context that - will be replaced. It returns (id * rewrite_template) where id is the part - that will be substituted with match_context, and rewrite_template is the - source that's been templatized. *) -let of_match_context - ?(fresh = sub_counter) - { range = - { match_start = { offset = start_index; _ } - ; match_end = { offset = end_index; _ } } - ; _ - } - ~source = - if debug then Format.printf "Start idx: %d@.End idx: %d@." start_index end_index; - let before_part = - if start_index = 0 then - "" - else - String.slice source 0 start_index - in - let after_part = String.slice source end_index (String.length source) in - let hole_id = fresh () in - let left, right = replacement_sentinel Metasyntax.default_metasyntax in - let rewrite_template = String.concat [before_part; left; hole_id; right; after_part] in - hole_id, rewrite_template - -(** return the offset for holes (specified by variables) in a given match template *) -let get_offsets_for_holes - variables - rewrite_template = - let sorted_variables = - List.fold variables ~init:[] ~f:(fun acc { variable; pattern } -> - match String.substr_index rewrite_template ~pattern with - | Some index -> ((variable, pattern), index)::acc - | None -> acc) - |> List.sort ~compare:(fun (_, i1) (_, i2) -> i1 - i2) - |> List.map ~f:fst - in - List.fold sorted_variables ~init:(rewrite_template, []) ~f:(fun (rewrite_template, acc) (variable, pattern) -> - match String.substr_index rewrite_template ~pattern with - | Some index -> - let rewrite_template = - String.substr_replace_all rewrite_template ~pattern ~with_:"" in - rewrite_template, (variable, index)::acc - | None -> rewrite_template, acc) - |> snd - -(** pretend we substituted vars in offsets with environment. return what the offsets are after *) -let get_offsets_after_substitution offsets environment = - if debug then Format.printf "Environment: %s@." @@ Match.Environment.to_string environment; - List.fold_right offsets ~init:([],0 ) ~f:(fun (var, offset) (acc, shift) -> - match Environment.lookup environment var with - | None -> acc, shift - | Some s -> - let offset' = offset + shift in - let shift = shift + String.length s in - ((var, offset')::acc), shift) - |> fst diff --git a/lib/kernel/matchers/rewrite_template.mli b/lib/kernel/matchers/rewrite_template.mli deleted file mode 100644 index 69891a6..0000000 --- a/lib/kernel/matchers/rewrite_template.mli +++ /dev/null @@ -1,29 +0,0 @@ -open Match - -type syntax = { variable: string; pattern: string } -[@@deriving sexp_of] - -type extracted = - | Hole of syntax - | Constant of string -[@@deriving sexp_of] - -module Make : Metasyntax.S -> sig - val parse : string -> extracted list option - val variables : string -> syntax list - end - -(** if [fresh] is set, then substitute the pattern :[id()] starting at 1, and - incrementing subsequent IDs. If [fresh] is unset, then by default substitute - the pattern :[id()] starting at 1, and increment for each occurence of - :[id()], left to right. *) -val substitute_fresh : ?metasyntax:Metasyntax.t -> ?fresh:(unit -> string) -> string -> string - -(** substitute returns the result and variables substituted for *) -val substitute : ?metasyntax:Metasyntax.t -> ?fresh:(unit -> string) -> string -> Environment.t -> (string * string list) - -val of_match_context : ?fresh:(unit -> string) -> Match.t -> source:string -> (string * string) - -val get_offsets_for_holes : syntax list -> string -> (string * int) list - -val get_offsets_after_substitution : (string * int) list -> Environment.t -> (string * int) list diff --git a/lib/kernel/matchers/rewriter.ml b/lib/kernel/matchers/rewriter.ml deleted file mode 100644 index 8d72a9c..0000000 --- a/lib/kernel/matchers/rewriter.ml +++ /dev/null @@ -1,2 +0,0 @@ -module Rewrite = Rewrite -module Rewrite_template = Rewrite_template diff --git a/lib/kernel/matchers/rewriter.mli b/lib/kernel/matchers/rewriter.mli deleted file mode 100644 index 8d72a9c..0000000 --- a/lib/kernel/matchers/rewriter.mli +++ /dev/null @@ -1,2 +0,0 @@ -module Rewrite = Rewrite -module Rewrite_template = Rewrite_template diff --git a/lib/kernel/matchers/rule.ml b/lib/kernel/matchers/rule.ml index aa72773..8d6b63e 100644 --- a/lib/kernel/matchers/rule.ml +++ b/lib/kernel/matchers/rule.ml @@ -1,129 +1,11 @@ open Core_kernel -open Angstrom +open Vangstrom -module Ast = struct - type atom = - | Variable of string - | String of string - [@@deriving sexp] +open Types.Ast - type antecedent = atom - [@@deriving sexp] - - type expression = - | True - | False - | Option of string - | Equal of atom * atom - | Not_equal of atom * atom - | Match of atom * (antecedent * consequent) list - | RewriteTemplate of string - | Rewrite of atom * (antecedent * expression) - and consequent = expression list - [@@deriving sexp] - - let (=) left right = Equal (left, right) - - let (<>) left right = Not_equal (left, right) -end +module Template = Template.Make(Metasyntax.Default) module Parser = struct - open Ast - - let alphanum = - satisfy (function - | 'a' .. 'z' - | 'A' .. 'Z' - | '0' .. '9' -> true - | _ -> false) - - let variable_parser = - (string Syntax.variable_left_delimiter - *> (many (alphanum <|> char '_') >>| String.of_char_list) - <* string Syntax.variable_right_delimiter) - - (** Interpret escape sequences inside quotes *) - let char_token_s = - (char '\\' *> any_char >>| - function - | 'r' -> Char.to_string '\r' - | 'n' -> Char.to_string '\n' - | 't' -> Char.to_string '\t' - | '\\' -> Char.to_string '\\' - | c -> Format.sprintf {|\%c|} c) - <|> (any_char >>| String.of_char) - - (** With escape sequences *) - let quote s = - (string s *> (many_till char_token_s (string s))) - >>| String.concat - - let raw s = - (string s *> (many_till any_char (string s))) - >>| String.of_char_list - - let quoted_parser = - choice [ quote {|"|}; quote {|'|}; raw {|`|} ] - - let operator_parser = - choice - [ string Syntax.equal - ; string Syntax.not_equal - ] - - let any_char_except ~reserved = - List.fold reserved - ~init:(return `OK) - ~f:(fun acc reserved_sequence -> - option `End_of_input - (peek_string (String.length reserved_sequence) - >>= fun s -> - if String.equal s reserved_sequence then - return `Reserved_sequence - else - acc)) - >>= function - | `OK -> any_char - | `End_of_input -> any_char - | `Reserved_sequence -> fail "reserved sequence hit" - - let value_parser ~reserved () = - match reserved with - | [] -> fail "no value allowed to scan here" - | reserved -> many (any_char_except ~reserved) - - let map_special s = - if String.is_prefix s ~prefix:"~" then - Variable (Format.sprintf ":[%s]" s) - else if String.equal s "_" then - Variable ":[_]" - else - String s - - let antecedent_parser ?(reserved = []) () = - choice - [ (quoted_parser >>| fun value -> String value) - ; (value_parser ~reserved () >>| fun value -> map_special (String.of_char_list value)) - ] - - let atom_parser () = - choice - [ (variable_parser >>| fun variable -> Variable variable) - ; (quoted_parser >>| fun value -> String value) - ; (value_parser ~reserved:[] () >>| fun value -> String (String.of_char_list value)) - ] - - let rewrite_template_parser = - quoted_parser >>| fun value -> RewriteTemplate value - - let ignore p = - p *> return () - - let make_equality_expression left operator right = - if String.equal operator Syntax.equal then - Equal (left, right) - else - Not_equal (left, right) let is_whitespace = function | ' ' | '\t' | '\r' | '\n' -> true @@ -134,49 +16,158 @@ module Parser = struct let spaces1 = satisfy is_whitespace *> - take_while is_whitespace *> - return () + take_while is_whitespace + + let alphanum = + satisfy (function + | 'a' .. 'z' + | 'A' .. 'Z' + | '0' .. '9' -> true + | _ -> false) + + let to_atom s = + match Template.parse s with + | [] -> String "" + | [ Constant c ] -> String c + | t -> Template t + + let variable_parser = + lift3 (fun _ v _ -> String.of_char_list v) + (string Syntax.variable_left_delimiter) + (many (alphanum <|> char '_')) + (string Syntax.variable_right_delimiter) + + (** Interpret escape sequences inside quotes *) + let char_token_s = + (char '\\' *> any_char >>| + function + | 'r' -> Char.to_string '\r' + | 'n' -> Char.to_string '\n' + | 't' -> Char.to_string '\t' + | '\\' -> Char.to_string '\\' + | c -> Format.sprintf {|\%c|} c) + <|> (lift String.of_char any_char) + + (** With escape sequences *) + let quote s = + lift2 (fun _ v -> String.concat v) + (string s) + (many_till char_token_s (string s)) + + let raw s = + lift2 (fun _ v -> String.of_char_list v) + (string s) + (many_till any_char (string s)) + + let quoted_parser = + choice ~failure_msg:"could not parse quoted value" + [ quote {|"|}; quote {|'|}; raw {|`|} ] + + let map_special s = + if String.is_prefix s ~prefix:"~" then + Template (Template.parse (Format.sprintf ":[%s]" s)) + else if String.equal s "_" then + Template (Template.parse ":[_]") + else + to_atom s + + let up_to p = + many1 (not_followed_by p *> any_char) + + let atom_up_to_spaces () = + choice + [ (lift to_atom quoted_parser) + ; lift (fun v -> to_atom (String.of_char_list v)) (up_to spaces1) + ] + + let atom_up_to_terminal () = + choice + [ (lift to_atom quoted_parser) + ; (lift + (fun v -> to_atom (String.of_char_list v)) + (up_to + (choice + [ spaces1 *> return () + ; char ',' *> return () + ; char '}' *> return () + ]))) + ] + + let antecedent_parser () = + choice ~failure_msg:"could not parse LHS of ->" + [ (lift to_atom quoted_parser) + ; (lift (fun v -> map_special (String.of_char_list v)) (up_to (spaces *> string Syntax.arrow))) + ] + + let value_to_open_brace () = + choice + [ (lift to_atom quoted_parser) + ; (lift (fun v -> to_atom (String.of_char_list v)) (up_to (spaces *> char '{'))) + ] + + let value_to_comma () = + choice + [ (lift to_atom quoted_parser) + ; (lift (fun v -> to_atom (String.of_char_list v)) (up_to (spaces *> char ','))) + ] + + let rewrite_consequent_parser () = + choice + [ (lift to_atom quoted_parser) + ; (lift (fun v -> to_atom (String.of_char_list v)) (up_to (spaces *> char '}'))) + ] + + let operator_parser = + choice + [ string Syntax.equal + ; string Syntax.not_equal + ] + + let make_equality_expression left operator right = + if String.equal operator Syntax.equal then + Equal (left, right) + else + Not_equal (left, right) let optional_trailing c = option () (skip (Char.equal c)) - let option_parser = spaces *> string Syntax.option_nested <* spaces >>| fun _ -> Option "nested" + let option_parser = + lift (fun _ -> Option "nested") (spaces *> (string Syntax.option_nested) <* spaces) let true' = lift (fun _ -> True) (spaces *> string Syntax.true' <* spaces) let false' = lift (fun _ -> False) (spaces *> string Syntax.false' <* spaces) (** [==, !=] *) - let operator_parser = + let compare_parser = lift3 make_equality_expression - (spaces *> atom_parser ()) + (spaces *> atom_up_to_spaces ()) (spaces *> operator_parser) - (spaces *> atom_parser ()) + (spaces *> atom_up_to_terminal ()) <* spaces let make_rewrite_expression atom match_template rewrite_template = Rewrite (atom, (match_template, rewrite_template)) - let make_match_expression atom cases = - Match (atom, cases) - (** rewrite { -> } *) let rewrite_pattern_parser = lift3 make_rewrite_expression - (string Syntax.start_rewrite_pattern *> spaces *> atom_parser () <* spaces <* char '{' <* spaces) - (antecedent_parser ~reserved:[" ->"] () <* spaces <* string Syntax.arrow <* spaces) - (spaces *> rewrite_template_parser <* spaces <* char '}' <* spaces) + (string Syntax.start_rewrite_pattern + *> spaces*> value_to_open_brace () <* spaces <* char '{' <* spaces) + (antecedent_parser () <* spaces <* string Syntax.arrow <* spaces) + (rewrite_consequent_parser () <* spaces <* char '}') (** -> atom [, ], [,] *) let match_arrow_parser expression_parser = both - (antecedent_parser ~reserved:[" ->"] () <* spaces <* string Syntax.arrow <* spaces) - (spaces *> sep_by (char ',') expression_parser <* spaces <* optional_trailing ',' <* spaces) + (antecedent_parser () <* spaces <* string Syntax.arrow <* spaces) + (sep_by (char ',') expression_parser <* spaces <* optional_trailing ',' <* spaces) (** [|] *) let first_case_parser expression_parser = - spaces *> option () (ignore @@ string Syntax.pipe_operator *> spaces) *> + spaces *> option () (Omega_parser_helper.ignore @@ string Syntax.pipe_operator *> spaces) *> match_arrow_parser expression_parser (** | *) @@ -192,18 +183,18 @@ module Parser = struct (** match { } *) let match_pattern_parser expression_parser = - string Syntax.start_match_pattern *> spaces *> - lift2 - make_match_expression - (atom_parser () <* spaces <* char '{' <* spaces) + lift3 + (fun _ atom cases -> Match (atom, cases)) + (string Syntax.start_match_pattern *> spaces) + (value_to_open_brace () <* spaces <* char '{' <* spaces) (case_block expression_parser <* char '}' <* spaces) let expression_parser = fix (fun expression_parser -> - choice + choice ~failure_msg:"could not parse expression" [ match_pattern_parser expression_parser ; rewrite_pattern_parser - ; operator_parser + ; compare_parser ; true' ; false' ; option_parser @@ -222,7 +213,7 @@ module Parser = struct | Error error -> Or_error.error_string error end -type t = Ast.expression list +type t = Types.Rule.t [@@deriving sexp] type options = @@ -231,5 +222,5 @@ type options = let options rule = List.fold rule ~init:{ nested = false } ~f:(fun acc -> function - | Ast.Option name when String.(name = Syntax.option_nested) -> { nested = true } + | Types.Ast.Option name when String.(name = Syntax.option_nested) -> { nested = true } | _ -> acc) diff --git a/lib/kernel/matchers/script.ml b/lib/kernel/matchers/script.ml index 9299a21..3d25a1d 100644 --- a/lib/kernel/matchers/script.ml +++ b/lib/kernel/matchers/script.ml @@ -1,9 +1,9 @@ open Core_kernel -open Angstrom +open Vangstrom open Rule open Parser -open Ast +open Types.Ast type spec = Specification.t [@@deriving sexp] @@ -36,11 +36,18 @@ let chainl1 e op = let parens p = char '(' *> (p <|> return []) <* char ')' + +let template_parser until = + choice + [ (lift to_atom quoted_parser) + ; (lift (fun v -> to_atom (String.of_char_list v)) (Omega_parser_helper.many1_till any_char until)) + ] + let spec = let match_rewrite_parser = both - (spaces *> atom_parser ()) - (option None (spaces *> string Syntax.arrow *> spaces *> atom_parser () >>| fun x -> Some x)) + (spaces *> template_parser (spaces *> string "->")) + (option None (spaces *> string Syntax.arrow *> spaces *> template_parser (spaces1 *> string "where" *> spaces1) >>| fun v -> Some v)) (* FIXME use of reserved *) in match_rewrite_parser >>= fun (match_template_atom, rewrite_template_atom) -> (option None (spaces1 *> parse >>| fun x -> Some x)) >>= fun rule -> diff --git a/lib/kernel/matchers/specification.ml b/lib/kernel/matchers/specification.ml index 8788502..a0a83c6 100644 --- a/lib/kernel/matchers/specification.ml +++ b/lib/kernel/matchers/specification.ml @@ -1,4 +1,5 @@ open Core_kernel +open Vangstrom type t = { match_template : string @@ -7,6 +8,119 @@ type t = } [@@deriving sexp] - let create ?rewrite_template ?rule ~match_template () = { match_template; rule; rewrite_template } + + +let identifier_parser () = + many (Omega_parser_helper.alphanum <|> char '_') + >>| String.of_char_list + +let single_hole_parser () = + string ":[[" *> identifier_parser () <* string "]]" >>| fun _ -> Some {|(\w+)|} + +let everything_hole_parser () = + string ":[" *> identifier_parser () <* string "]" >>| fun _ -> Some {|(\n|.)*?|} + +let expression_hole_parser () = + string ":[" *> identifier_parser () <* string ":e" <* string "]" >>| fun _ -> Some {|(\n|.)*?|} + +let non_space_hole_parser () = + string ":[" *> identifier_parser () <* string ".]" >>| fun _ -> Some {|([^ \t\s\r\n])+|} + +let line_hole_parser () = + string ":[" *> identifier_parser () <* string "\\n]" >>| fun _ -> Some {|(\n|.)*?|} + +let blank_hole_parser () = + string ":[" + *> many1 Omega_parser_helper.blank + *> identifier_parser () + <* string "]" + >>| fun _ -> Some {|(\ |\t|\s|\r|\n)+|} + +let any_char_except ~reserved = + List.fold reserved + ~init:(return `OK) + ~f:(fun acc reserved_sequence -> + option `End_of_input + (peek_string (String.length reserved_sequence) + >>= fun s -> + if String.equal s reserved_sequence then + return `Reserved_sequence + else + acc)) + >>= function + | `OK -> any_char + | `End_of_input -> any_char + | `Reserved_sequence -> fail "reserved sequence hit" + +let regex_body () = + fix (fun expr -> + (choice + [ ((char '[' *> (many1 expr) <* char ']') + >>| fun char_class -> Format.sprintf "[%s]" @@ String.concat char_class) + ; (char '\\' *> any_char >>| fun c -> (Format.sprintf "\\%c" c)) + ; ((any_char_except ~reserved:["]"])) >>| Char.to_string + ] + )) + +let regex_hole_parser () = + string ":[" + *> identifier_parser () + *> char '~' + *> (many1 @@ regex_body ()) >>= fun regex -> + string "]" >>= fun _ -> return (Some (String.concat regex)) + +type extracted = + | Regex of string + | Contiguous_whitespace of string + | Non_space of string + +let up_to p = + many1 (not_followed_by p *> any_char) + +let extract : extracted list Vangstrom.t = + let hole = + choice + [ single_hole_parser () + ; everything_hole_parser () + ; expression_hole_parser () + ; non_space_hole_parser () + ; line_hole_parser () + ; blank_hole_parser () + ; regex_hole_parser () + ] + in + many @@ choice + [ (hole >>| fun v -> Option.map v ~f:(fun v -> Regex v)) + ; (Omega_parser_helper.spaces1 >>| fun s -> Some (Contiguous_whitespace s)) + ; (lift + (fun v -> Some (Non_space (String.of_char_list v))) + (up_to (choice [hole *> return (); Omega_parser_helper.spaces1 *> return ()]))) + ] + >>| fun result -> List.filter_opt result + +let escape s = + let rec aux chars = + match chars with + | [] -> [] + | x :: xs -> + match x with + | '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' as c -> + '\\' :: c :: (aux xs) + | c -> c :: (aux xs) + in + aux (String.to_list s) + |> String.of_char_list + +let to_regex { match_template; _ } = + let extracted = parse_string ~consume:All extract match_template |> Result.ok_or_failwith in + (* Escape regex metachars and replace contiguous spaces with the regex \s+. Holes become a general regex. *) + let b = Buffer.create 10 in + Buffer.add_string b "("; + List.iter extracted ~f:(function + | Regex s -> Buffer.add_string b s + | Non_space s -> Buffer.add_string b (escape s) + | Contiguous_whitespace _ -> Buffer.add_string b {|\s+|}); + Buffer.add_string b ")"; + Buffer.contents b diff --git a/lib/kernel/matchers/specification.mli b/lib/kernel/matchers/specification.mli index c9dd8bc..ed64bb0 100644 --- a/lib/kernel/matchers/specification.mli +++ b/lib/kernel/matchers/specification.mli @@ -6,3 +6,5 @@ type t = [@@deriving sexp] val create : ?rewrite_template:string -> ?rule:Rule.t -> match_template:string -> unit -> t + +val to_regex : t -> string diff --git a/lib/kernel/matchers/template.ml b/lib/kernel/matchers/template.ml new file mode 100644 index 0000000..50f7323 --- /dev/null +++ b/lib/kernel/matchers/template.ml @@ -0,0 +1,212 @@ +open Vangstrom +open Core_kernel + +open Match +open Types.Template + +module Make (Metasyntax : Types.Metasyntax.S) = struct + + let up_to p = + many1 (not_followed_by p *> any_char) + + let optional d = Option.value d ~default:"" + + let character () = + choice @@ List.map ~f:char (String.to_list Metasyntax.identifier) + + let identifier () = + many1 @@ character () >>| String.of_char_list + + let regex_expression suffix = + lift String.concat + (many1 @@ + fix (fun expr -> + choice + [ lift (fun x -> Format.sprintf "[%s]" @@ String.concat x) (char '[' *> many1 expr <* char ']') + ; lift (fun c -> Format.sprintf {|\%c|} c) (char '\\' *> any_char) + ; lift String.of_char_list (up_to (string suffix)) + ]) + ) + + let regex_body separator suffix = + both + (option "" (identifier ())) + (char separator *> regex_expression suffix) + + (** Parsers for Matching. Different from rewrite templates which can have :[x].attribute *) + module Matching = struct + + (** Folds left to respect order of definitions in custom metasyntax for + matching, where we attempt to parse in order. Note this is significant if a + syntax like $X~regex should be tried before shortcircuiting on $X, in which + case it should be defined _after_ the $X syntax (most general should be + first). *) + let hole_parsers = + (* hole parsers for match templates only *) + List.fold ~init:[] Metasyntax.syntax ~f:(fun acc v -> + let result = match v with + | Hole (sort, Delimited (left, right)) -> + sort, + lift3 + (fun _left v _right -> v) + (string (optional left)) + (identifier ()) + (string (optional right)) + + | Hole (sort, Reserved_identifiers l) -> + sort, + choice (List.map l ~f:string) + + | Regex (left, separator, right) -> + Regex, + (* matcher wants and splits it later. Fix + this later to give v and pattern only *) + lift3 + (fun _left (v, expr) _right -> Format.sprintf "%s%c%s" v separator expr) + (string left) + (regex_body separator right) + (string right) + in + result::acc) + end + + let attribute_to_kind = function + | "value" -> Value + | "length" -> Length + | "type" -> Type + | "file.name" -> FileName + | "file.path" -> FilePath + | _ -> failwith "invalid attribute" + + let attribute_access () = + char '.' *> choice [ string "length"; string "type" ] <* not_followed_by (Omega_parser_helper.alphanum) + + (** Folds left to respect order of definitions in custom metasyntax for + matching, where we attempt to parse in order. Note this is significant if a + syntax like $X~regex should be tried before shortcircuiting on $X, in which + case it should be defined _after_ the $X syntax (most general should be + first). *) + let rewrite_hole_parsers = + List.fold ~init:[] Metasyntax.syntax ~f:(fun acc v -> + let result = + match v with + | Hole (_, Delimited (left, right)) -> + lift4 + (fun left v right kind -> + let dot_attribute = if String.(kind = "value") then "" else "."^kind in + Format.sprintf "%s%s%s%s" left v right dot_attribute, v, kind) + (string (optional left)) + (identifier ()) + (string (optional right)) + (option "value" (attribute_access ())) + | Hole (_, Reserved_identifiers l) -> + lift2 + (fun v kind -> + let dot_attribute = if String.(kind = "value") then "" else "."^kind in + Format.sprintf "%s%s" v dot_attribute, v, kind) + (choice (List.map l ~f:string)) + (option "value" (attribute_access ())) + | Regex (left, separator, right) -> + lift4 + (fun left (v, expr) right kind -> + let dot_attribute = if String.(kind = "value") then "" else "."^kind in + Format.sprintf "%s%s%c%s%s%s" + left v separator expr right dot_attribute, v, kind) + (string left) + (regex_body separator right) + (string right) + (option "value" (attribute_access ())) + in + result::acc) + + let parse_template = + let hole = choice rewrite_hole_parsers in + many @@ choice + [ (pos >>= fun offset -> hole >>| fun (pattern, variable, kind) -> + Hole { pattern; variable; offset; kind = attribute_to_kind kind }) + ; ((up_to (choice rewrite_hole_parsers)) >>| fun c -> Constant (String.of_char_list c)) + ] + + let parse template = + match parse_string ~consume:All parse_template template with + | Ok result -> result + | Error e -> failwith ("No rewrite template parse: "^e) + + let variables template = + parse template + |> List.filter_map ~f:(function + | Hole { pattern; variable; offset; kind } -> + Some { pattern; variable; offset; kind } + | _ -> None) + + let to_string template = + let buf = Buffer.create 10 in + List.iter template ~f:(function + | Constant c -> Buffer.add_string buf c + | Hole { pattern; _ } -> Buffer.add_string buf pattern); + Buffer.contents buf + + let substitute_kind { variable; kind; _ } env = + let open Option in + let length_to_string n = Format.sprintf "%d" (String.length n) in + match kind with + | Value -> Environment.lookup env variable + | Length -> Environment.lookup env variable >>| length_to_string + | FileName -> failwith "unimplemented" + | FilePath -> failwith "unimplemented" + | Type -> failwith "unimplemented" + + let substitute template environment = + let replacement_content, environment', _ = + List.fold template ~init:([], Environment.create (), 0) ~f:(fun (result, env, pos) -> function + | Constant c -> c::result, env, pos + String.length c + | Hole ({ variable; pattern; _ } as h) -> + match substitute_kind h environment with + | None -> pattern::result, env, pos + String.length variable + | Some value -> + let advance = pos + String.length value in + let range = + Range. + { match_start = Location.{ default with offset = pos } + ; match_end = Location.{ default with offset = advance } + } + in + (* FIXME: here we should probably use pattern, or hole. We don't + want to substitute var x for length value if it's used as :[x] + vs :[x].length in the same rewrite template. This will only + affect the replacement values, which won't clobber the actual + result. *) + let env = Environment.add ~range env variable value in + value::result, env, advance) + in + String.concat (List.rev replacement_content), environment' + + (** Currently dead code. Alternative to substitute that searches for hole + patterns and uses substr_replace_all. Don't know if it's faster, need to + test. Also appears to have a minor offset issue. *) + let substitute' template environment = + let vars = + List.filter_map template ~f:(function + | Hole { pattern; variable; offset; kind } -> Some { pattern; variable; offset; kind } + | _ -> None) + in + let template_string = to_string template in + let replacement_content, environment = + List.fold vars ~init:(template_string, Environment.create ()) ~f:(fun (template, env) { variable; pattern; _ } -> + match Environment.lookup environment variable with + | None -> template, env + | Some value -> + match String.substr_index template_string ~pattern with + | None -> template, env + | Some offset -> + let range = + Range. + { match_start = Location.{ default with offset } + ; match_end = Location.{ default with offset = offset + String.length value } + } + in + let env = Environment.add ~range env variable value in + String.substr_replace_all template ~pattern ~with_:value, env) + in + replacement_content, environment +end diff --git a/lib/kernel/matchers/template.mli b/lib/kernel/matchers/template.mli new file mode 100644 index 0000000..6756ad4 --- /dev/null +++ b/lib/kernel/matchers/template.mli @@ -0,0 +1,16 @@ +open Types.Template + +module Make : Metasyntax.S -> sig + + module Matching : sig + val hole_parsers : (Types.Hole.sort * string Vangstrom.t) list + end + + val parse : string -> t + + val variables : string -> syntax list + + val to_string : t -> string + + val substitute : t -> Match.Environment.t -> (string * Match.Environment.t) + end diff --git a/lib/kernel/matchers/types.ml b/lib/kernel/matchers/types.ml index 1888393..3146273 100644 --- a/lib/kernel/matchers/types.ml +++ b/lib/kernel/matchers/types.ml @@ -1,42 +1,42 @@ open Core_kernel -module Syntax = struct - type escapable_string_literals = - { delimiters : string list - ; escape_character: char - } - [@@deriving yojson] - - type comment_kind = - | Multiline of string * string - | Nested_multiline of string * string - | Until_newline of string - [@@deriving yojson] - - type t = - { user_defined_delimiters : (string * string) list - ; escapable_string_literals : escapable_string_literals option [@default None] - ; raw_string_literals : (string * string) list - ; comments : comment_kind list - } - [@@deriving yojson] - - module type S = sig - val user_defined_delimiters : (string * string) list - val escapable_string_literals : escapable_string_literals option - val raw_string_literals : (string * string) list - val comments : comment_kind list - end -end - -module Info = struct - module type S = sig - val name : string - val extensions : string list - end -end - module Language = struct + module Syntax = struct + type escapable_string_literals = + { delimiters : string list + ; escape_character: char + } + [@@deriving yojson] + + type comment_kind = + | Multiline of string * string + | Nested_multiline of string * string + | Until_newline of string + [@@deriving yojson] + + type t = + { user_defined_delimiters : (string * string) list + ; escapable_string_literals : escapable_string_literals option [@default None] + ; raw_string_literals : (string * string) list + ; comments : comment_kind list + } + [@@deriving yojson] + + module type S = sig + val user_defined_delimiters : (string * string) list + val escapable_string_literals : escapable_string_literals option + val raw_string_literals : (string * string) list + val comments : comment_kind list + end + end + + module Info = struct + module type S = sig + val name : string + val extensions : string list + end + end + module type S = sig module Info : Info.S module Syntax : Syntax.S @@ -49,7 +49,6 @@ type dimension = | Raw_string_literal | Comment -type id = string type including = char list type until = char option @@ -114,12 +113,70 @@ type production = | String of string | Hole of hole +module Template = struct + type kind = + | Value + | Length + | FileName + | FilePath + | Type + [@@deriving sexp] + + type syntax = + { variable: string (* E.g., x *) + ; pattern: string (* E.g., the entire :[x] part *) + ; offset : int + ; kind : kind (* The kind of hole, to inform substitution *) + } + [@@deriving sexp] + + type atom = + | Hole of syntax + | Constant of string + [@@deriving sexp] + + type t = atom list + [@@deriving sexp] +end + +module Ast = struct + type atom = + | Template of Template.t + | String of string + [@@deriving sexp] + + type antecedent = atom + [@@deriving sexp] + + type kind = (* FIXME holes needs to have associated substitution kind *) + | Value + | Length + | Type + | File + [@@deriving sexp] + + type expression = + | True + | False + | Option of string + | Equal of atom * atom + | Not_equal of atom * atom + | Match of atom * (antecedent * consequent) list + | Rewrite of atom * (antecedent * atom) + and consequent = expression list + [@@deriving sexp] +end + +module Rule = struct + type t = Ast.expression list + [@@deriving sexp] +end + module Matcher = struct module type S = sig val all : ?configuration:Configuration.t -> ?rule:Rule.t - -> ?nested: bool -> template:string -> source:string -> unit @@ -132,7 +189,7 @@ module Matcher = struct -> string -> Match.t Or_error.t - include Info.S + include Language.Info.S val set_rewrite_template : string -> unit end @@ -194,6 +251,6 @@ module Engine = struct val all : (module Matcher.S) list val select_with_extension : ?metasyntax:Metasyntax.t -> string -> (module Matcher.S) option - val create : ?metasyntax:Metasyntax.t -> Syntax.t -> (module Matcher.S) + val create : ?metasyntax:Metasyntax.t -> Language.Syntax.t -> (module Matcher.S) end end diff --git a/lib/kernel/parsers/dune b/lib/kernel/parsers/dune index 2b422e1..2db29a6 100644 --- a/lib/kernel/parsers/dune +++ b/lib/kernel/parsers/dune @@ -3,4 +3,4 @@ (public_name comby-kernel.parsers) (instrumentation (backend bisect_ppx)) (preprocess (pps ppx_sexp_conv)) - (libraries core_kernel angstrom mparser)) + (libraries core_kernel comby.vangstrom mparser)) diff --git a/lib/kernel/parsers/omega_comments.ml b/lib/kernel/parsers/omega_comments.ml index 08bd61e..0d148a0 100644 --- a/lib/kernel/parsers/omega_comments.ml +++ b/lib/kernel/parsers/omega_comments.ml @@ -1,6 +1,6 @@ open Core_kernel -open Angstrom +open Vangstrom let (|>>) p f = p >>= fun x -> return (f x) diff --git a/lib/kernel/parsers/omega_string_literals.ml b/lib/kernel/parsers/omega_string_literals.ml index 51bee55..7521c83 100644 --- a/lib/kernel/parsers/omega_string_literals.ml +++ b/lib/kernel/parsers/omega_string_literals.ml @@ -1,6 +1,6 @@ open Core_kernel -open Angstrom +open Vangstrom let (|>>) p f = p >>= fun x -> return (f x) diff --git a/lib/vendored/dune b/lib/vendored/dune new file mode 100644 index 0000000..e69de29 diff --git a/lib/vendored/vangstrom/LICENSE b/lib/vendored/vangstrom/LICENSE new file mode 100644 index 0000000..680d912 --- /dev/null +++ b/lib/vendored/vangstrom/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2016, Inhabited Type LLC + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/vendored/vangstrom/async/dune b/lib/vendored/vangstrom/async/dune new file mode 100644 index 0000000..b2dd8e0 --- /dev/null +++ b/lib/vendored/vangstrom/async/dune @@ -0,0 +1,5 @@ +(library + (name vangstrom_async) + (public_name comby.vangstrom-async) + (flags :standard -safe-string) + (libraries vangstrom async)) diff --git a/lib/vendored/vangstrom/async/vangstrom_async.ml b/lib/vendored/vangstrom/async/vangstrom_async.ml new file mode 100644 index 0000000..0303770 --- /dev/null +++ b/lib/vendored/vangstrom/async/vangstrom_async.ml @@ -0,0 +1,85 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Vangstrom.Unbuffered +open Core +open Async + +let empty_bigstring = Bigstring.create 0 + +let rec finalize state result = + (* It is very important to understand the assumptions that go into the second + * case. If execution reaches that case, then that means that the parser has + * commited all the way up to the last byte that was read by the reader, and + * the reader's internal buffer is empty. If the parser hadn't committed up + * to the last byte, then the reader buffer would not be empty and execution + * would hit the first case rather than the second. + * + * In other words, the second case looks wrong but it's not. *) + match state, result with + | Partial p, `Eof_with_unconsumed_data s -> + let bigstring = Bigstring.of_string s in + finalize (p.continue bigstring ~off:0 ~len:(String.length s) Complete) `Eof + | Partial p, `Eof -> + finalize (p.continue empty_bigstring ~off:0 ~len:0 Complete) `Eof + | Partial _, `Stopped () -> assert false + | (Done _ | Fail _) , _ -> state_to_result state + +let response = function + | Partial p -> `Consumed(p.committed, `Need_unknown) + | Done(c, _) -> `Stop_consumed((), c) + | Fail _ -> `Stop () + +let default_pushback () = Deferred.unit + +let parse ?(pushback=default_pushback) p reader = + let state = ref (parse p) in + let handle_chunk buf ~pos ~len = + begin match !state with + | Partial p -> + state := p.continue buf ~off:pos ~len Incomplete; + | Done _ | Fail _ -> () + end; + pushback () >>| fun () -> response !state + in + Reader.read_one_chunk_at_a_time reader ~handle_chunk >>| fun result -> + finalize !state result + +let async_many e k = + Vangstrom.(skip_many (e <* commit >>| k) "async_many") + +let parse_many p write reader = + let wait = ref (default_pushback ()) in + let k x = wait := write x in + let pushback () = !wait in + parse ~pushback (async_many p k) reader diff --git a/lib/vendored/vangstrom/async/vangstrom_async.mli b/lib/vendored/vangstrom/async/vangstrom_async.mli new file mode 100644 index 0000000..8c65538 --- /dev/null +++ b/lib/vendored/vangstrom/async/vangstrom_async.mli @@ -0,0 +1,48 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Vangstrom +open Async + + +val parse : + ?pushback:(unit -> unit Deferred.t) + -> 'a t + -> Reader.t + -> ('a, string) result Deferred.t + +val parse_many : + 'a t + -> ('a -> unit Deferred.t) + -> Reader.t + -> (unit, string) result Deferred.t diff --git a/lib/vendored/vangstrom/dune b/lib/vendored/vangstrom/dune new file mode 100644 index 0000000..e69de29 diff --git a/lib/vendored/vangstrom/lib/buffering.ml b/lib/vendored/vangstrom/lib/buffering.ml new file mode 100644 index 0000000..8d71cd4 --- /dev/null +++ b/lib/vendored/vangstrom/lib/buffering.ml @@ -0,0 +1,88 @@ +type t = + { mutable buf : Bigstringaf.t + ; mutable off : int + ; mutable len : int } + +let of_bigstring ~off ~len buf = + assert (off >= 0); + assert (Bigstringaf.length buf >= len - off); + { buf; off; len } + +let create len = + of_bigstring ~off:0 ~len:0 (Bigstringaf.create len) + +let writable_space t = + Bigstringaf.length t.buf - t.len + +let trailing_space t = + Bigstringaf.length t.buf - (t.off + t.len) + +let compress t = + Bigstringaf.unsafe_blit t.buf ~src_off:t.off t.buf ~dst_off:0 ~len:t.len; + t.off <- 0 + +let grow t to_copy = + let old_len = Bigstringaf.length t.buf in + let new_len = ref old_len in + let space = writable_space t in + while space + !new_len - old_len < to_copy do + new_len := (3 * !new_len) / 2 + done; + let new_buf = Bigstringaf.create !new_len in + Bigstringaf.unsafe_blit t.buf ~src_off:t.off new_buf ~dst_off:0 ~len:t.len; + t.buf <- new_buf; + t.off <- 0 + +let ensure t to_copy = + if trailing_space t < to_copy then + if writable_space t >= to_copy + then compress t + else grow t to_copy + +let write_pos t = + t.off + t.len + +let feed_string t ~off ~len str = + assert (off >= 0); + assert (String.length str >= len - off); + ensure t len; + Bigstringaf.unsafe_blit_from_string str ~src_off:off t.buf ~dst_off:(write_pos t) ~len; + t.len <- t.len + len + +let feed_bigstring t ~off ~len b = + assert (off >= 0); + assert (Bigstringaf.length b >= len - off); + ensure t len; + Bigstringaf.unsafe_blit b ~src_off:off t.buf ~dst_off:(write_pos t) ~len; + t.len <- t.len + len + +let feed_input t = function + | `String s -> feed_string t ~off:0 ~len:(String .length s) s + | `Bigstring b -> feed_bigstring t ~off:0 ~len:(Bigstringaf.length b) b + +let shift t n = + assert (t.len >= n); + t.off <- t.off + n; + t.len <- t.len - n + +let for_reading { buf; off; len } = + Bigstringaf.sub ~off ~len buf + +module Unconsumed = struct + type t = + { buf : Bigstringaf.t + ; off : int + ; len : int } +end + +let unconsumed ?(shift=0) { buf; off; len } = + assert (len >= shift); + { Unconsumed.buf; off = off + shift; len = len - shift } + +let of_unconsumed { Unconsumed.buf; off; len } = + { buf; off; len } + +type unconsumed = Unconsumed.t = + { buf : Bigstringaf.t + ; off : int + ; len : int } diff --git a/lib/vendored/vangstrom/lib/buffering.mli b/lib/vendored/vangstrom/lib/buffering.mli new file mode 100644 index 0000000..f458471 --- /dev/null +++ b/lib/vendored/vangstrom/lib/buffering.mli @@ -0,0 +1,20 @@ +type t + +val create : int -> t +val of_bigstring : off:int -> len:int -> Bigstringaf.t -> t + +val feed_string : t -> off:int -> len:int -> string -> unit +val feed_bigstring : t -> off:int -> len:int -> Bigstringaf.t -> unit +val feed_input : t -> [ `String of string | `Bigstring of Bigstringaf.t ] -> unit + +val shift : t -> int -> unit + +val for_reading : t -> Bigstringaf.t + +type unconsumed = + { buf : Bigstringaf.t + ; off : int + ; len : int } + +val unconsumed : ?shift:int -> t -> unconsumed +val of_unconsumed : unconsumed -> t diff --git a/lib/vendored/vangstrom/lib/dune b/lib/vendored/vangstrom/lib/dune new file mode 100644 index 0000000..d0dbb03 --- /dev/null +++ b/lib/vendored/vangstrom/lib/dune @@ -0,0 +1,6 @@ +(library + (name vangstrom) + (public_name comby.vangstrom) + (libraries bigstringaf) + (flags :standard -safe-string) + (preprocess future_syntax)) diff --git a/lib/vendored/vangstrom/lib/exported_state.ml b/lib/vendored/vangstrom/lib/exported_state.ml new file mode 100644 index 0000000..5bb5c70 --- /dev/null +++ b/lib/vendored/vangstrom/lib/exported_state.ml @@ -0,0 +1,22 @@ +type 'a state = + | Partial of 'a partial + | Done of int * 'a + | Fail of int * string list * string + +and 'a partial = + { committed : int + ; continue : Bigstringaf.t -> off:int -> len:int -> More.t -> 'a state } + + +let state_to_option x = match x with + | Done(_, v) -> Some v + | Fail _ -> None + | Partial _ -> None + +let fail_to_string marks err = + String.concat " > " marks ^ ": " ^ err + +let state_to_result x = match x with + | Done(_, v) -> Ok v + | Partial _ -> Error "incomplete input" + | Fail(_, marks, err) -> Error (fail_to_string marks err) diff --git a/lib/vendored/vangstrom/lib/input.ml b/lib/vendored/vangstrom/lib/input.ml new file mode 100644 index 0000000..0355d36 --- /dev/null +++ b/lib/vendored/vangstrom/lib/input.ml @@ -0,0 +1,111 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2017 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +type t = + { mutable parser_committed_bytes : int + ; client_committed_bytes : int + ; off : int + ; len : int + ; buffer : Bigstringaf.t + } + +let create buffer ~off ~len ~committed_bytes = + { parser_committed_bytes = committed_bytes + ; client_committed_bytes = committed_bytes + ; off + ; len + ; buffer } + +let length t = t.client_committed_bytes + t.len +let client_committed_bytes t = t.client_committed_bytes +let parser_committed_bytes t = t.parser_committed_bytes + +let committed_bytes_discrepancy t = t.parser_committed_bytes - t.client_committed_bytes +let bytes_for_client_to_commit t = committed_bytes_discrepancy t + +let parser_uncommitted_bytes t = t.len - bytes_for_client_to_commit t + +let invariant t = + assert (parser_committed_bytes t + parser_uncommitted_bytes t = length t); + assert (parser_committed_bytes t - client_committed_bytes t = bytes_for_client_to_commit t); +;; + +let offset_in_buffer t pos = + t.off + pos - t.client_committed_bytes + +let apply t pos len ~f = + let off = offset_in_buffer t pos in + f t.buffer ~off ~len + +let unsafe_get_char t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get t.buffer off + +let unsafe_get_int16_le t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get_int16_le t.buffer off + +let unsafe_get_int32_le t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get_int32_le t.buffer off + +let unsafe_get_int64_le t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get_int64_le t.buffer off + +let unsafe_get_int16_be t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get_int16_be t.buffer off + +let unsafe_get_int32_be t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get_int32_be t.buffer off + +let unsafe_get_int64_be t pos = + let off = offset_in_buffer t pos in + Bigstringaf.unsafe_get_int64_be t.buffer off + +let count_while t pos ~f = + let buffer = t.buffer in + let off = offset_in_buffer t pos in + let i = ref off in + let limit = t.off + t.len in + while !i < limit && f (Bigstringaf.unsafe_get buffer !i) do + incr i + done; + !i - off +;; + +let commit t pos = + t.parser_committed_bytes <- pos +;; diff --git a/lib/vendored/vangstrom/lib/input.mli b/lib/vendored/vangstrom/lib/input.mli new file mode 100644 index 0000000..cc9cc4a --- /dev/null +++ b/lib/vendored/vangstrom/lib/input.mli @@ -0,0 +1,88 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2017 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +(** An [Input.t] represents a series of buffers, of which we only have access + to one, and a pointer to how much has been committed, which is in the + current buffer. + + parser commit point + V + +--------------------------------------+ + |#################'####################| current buffer + +-----------------+--------------------------------------+----- + |#################|#################'####################|###.. input + +-----------------+--------------------------------------+----- + ' ' ' ' + |--------------------------------------------------------| + ' ' length ' ' + |-----------------| ' ' + client_committed_bytes ' ' + ' ' |--------------------| + ' ' parser_uncommitted_bytes + ' |-----------------| + ' bytes_for_client_to_commit + |-----------------------------------| + parser_committed_bytes + + Note that a buffer is a subsequence of a [Bigstringaf.t], defined by [off] and [len]. + + All [int] position arguments should be relative to the beginning of the + whole input. *) + +type t + +val create : Bigstringaf.t -> off:int -> len:int -> committed_bytes:int -> t + +val length : t -> int + +val client_committed_bytes : t -> int +val parser_committed_bytes : t -> int +val parser_uncommitted_bytes : t -> int + +val bytes_for_client_to_commit : t -> int + +val unsafe_get_char : t -> int -> char +val unsafe_get_int16_le : t -> int -> int +val unsafe_get_int32_le : t -> int -> int32 +val unsafe_get_int64_le : t -> int -> int64 +val unsafe_get_int16_be : t -> int -> int +val unsafe_get_int32_be : t -> int -> int32 +val unsafe_get_int64_be : t -> int -> int64 + +val count_while : t -> int -> f:(char -> bool) -> int + +val apply : t -> int -> int -> f:(Bigstringaf.t -> off:int -> len:int -> 'a) -> 'a + +val commit : t -> int -> unit + +val invariant : t -> unit diff --git a/lib/vendored/vangstrom/lib/more.ml b/lib/vendored/vangstrom/lib/more.ml new file mode 100644 index 0000000..fe6d510 --- /dev/null +++ b/lib/vendored/vangstrom/lib/more.ml @@ -0,0 +1,3 @@ +type t = + | Complete + | Incomplete diff --git a/lib/vendored/vangstrom/lib/more.mli b/lib/vendored/vangstrom/lib/more.mli new file mode 100644 index 0000000..9d96703 --- /dev/null +++ b/lib/vendored/vangstrom/lib/more.mli @@ -0,0 +1,3 @@ +type t = + | Complete + | Incomplete diff --git a/lib/vendored/vangstrom/lib/parser.ml b/lib/vendored/vangstrom/lib/parser.ml new file mode 100644 index 0000000..7afcc2a --- /dev/null +++ b/lib/vendored/vangstrom/lib/parser.ml @@ -0,0 +1,173 @@ +module State = struct + type 'a t = + | Partial of 'a partial + | Lazy of 'a t Lazy.t + | Done of int * 'a + | Fail of int * string list * string + + and 'a partial = + { committed : int + ; continue : Bigstringaf.t -> off:int -> len:int -> More.t -> 'a t } + +end +type 'a with_state = Input.t -> int -> More.t -> 'a + +type 'a failure = (string list -> string -> 'a State.t) with_state +type ('a, 'r) success = ('a -> 'r State.t) with_state + +type 'a t = + { run : 'r. ('r failure -> ('a, 'r) success -> 'r State.t) with_state } + +let fail_k input pos _ marks msg = + State.Fail(pos - Input.client_committed_bytes input, marks, msg) +let succeed_k input pos _ v = + State.Done(pos - Input.client_committed_bytes input, v) + +let rec to_exported_state = function + | State.Partial {committed;continue} -> + Exported_state.Partial + { committed + ; continue = + fun bs ~off ~len more -> + to_exported_state (continue bs ~off ~len more)} + | State.Done (i,x) -> Exported_state.Done (i,x) + | State.Fail (i, sl, s) -> Exported_state.Fail (i, sl, s) + | State.Lazy x -> to_exported_state (Lazy.force x) + +let parse p = + let input = Input.create Bigstringaf.empty ~committed_bytes:0 ~off:0 ~len:0 in + to_exported_state (p.run input 0 Incomplete fail_k succeed_k) + +let parse_bigstring p input = + let input = Input.create input ~committed_bytes:0 ~off:0 ~len:(Bigstringaf.length input) in + Exported_state.state_to_result (to_exported_state (p.run input 0 Complete fail_k succeed_k)) + +module Monad = struct + let return v = + { run = fun input pos more _fail succ -> + succ input pos more v + } + + let fail msg = + { run = fun input pos more fail _succ -> + fail input pos more [] msg + } + + let (>>=) p f = + { run = fun input pos more fail succ -> + let succ' input' pos' more' v = (f v).run input' pos' more' fail succ in + p.run input pos more fail succ' + } + + let (>>|) p f = + { run = fun input pos more fail succ -> + let succ' input' pos' more' v = succ input' pos' more' (f v) in + p.run input pos more fail succ' + } + + let (<$>) f m = + m >>| f + + let (<*>) f m = + (* f >>= fun f -> m >>| f *) + { run = fun input pos more fail succ -> + let succ0 input0 pos0 more0 f = + let succ1 input1 pos1 more1 m = succ input1 pos1 more1 (f m) in + m.run input0 pos0 more0 fail succ1 + in + f.run input pos more fail succ0 } + + let lift f m = + f <$> m + + let lift2 f m1 m2 = + { run = fun input pos more fail succ -> + let succ1 input1 pos1 more1 m1 = + let succ2 input2 pos2 more2 m2 = succ input2 pos2 more2 (f m1 m2) in + m2.run input1 pos1 more1 fail succ2 + in + m1.run input pos more fail succ1 } + + let lift3 f m1 m2 m3 = + { run = fun input pos more fail succ -> + let succ1 input1 pos1 more1 m1 = + let succ2 input2 pos2 more2 m2 = + let succ3 input3 pos3 more3 m3 = + succ input3 pos3 more3 (f m1 m2 m3) in + m3.run input2 pos2 more2 fail succ3 in + m2.run input1 pos1 more1 fail succ2 + in + m1.run input pos more fail succ1 } + + let lift4 f m1 m2 m3 m4 = + { run = fun input pos more fail succ -> + let succ1 input1 pos1 more1 m1 = + let succ2 input2 pos2 more2 m2 = + let succ3 input3 pos3 more3 m3 = + let succ4 input4 pos4 more4 m4 = + succ input4 pos4 more4 (f m1 m2 m3 m4) in + m4.run input3 pos3 more3 fail succ4 in + m3.run input2 pos2 more2 fail succ3 in + m2.run input1 pos1 more1 fail succ2 + in + m1.run input pos more fail succ1 } + + let ( *>) a b = + (* a >>= fun _ -> b *) + { run = fun input pos more fail succ -> + let succ' input' pos' more' _ = b.run input' pos' more' fail succ in + a.run input pos more fail succ' + } + + let (<* ) a b = + (* a >>= fun x -> b >>| fun _ -> x *) + { run = fun input pos more fail succ -> + let succ0 input0 pos0 more0 x = + let succ1 input1 pos1 more1 _ = succ input1 pos1 more1 x in + b.run input0 pos0 more0 fail succ1 + in + a.run input pos more fail succ0 } +end + +module Choice = struct + let () p mark = + { run = fun input pos more fail succ -> + let fail' input' pos' more' marks msg = + fail input' pos' more' (mark::marks) msg in + p.run input pos more fail' succ + } + + let (<|>) p q = + { run = fun input pos more fail succ -> + let fail' input' pos' more' marks msg = + (* The only two constructors that introduce new failure continuations are + * [] and [<|>]. If the initial input position is less than the length + * of the committed input, then calling the failure continuation will + * have the effect of unwinding all choices and collecting marks along + * the way. *) + if pos < Input.parser_committed_bytes input' then + fail input' pos' more marks msg + else + q.run input' pos more' fail succ in + p.run input pos more fail' succ + } +end + +module Monad_use_for_debugging = struct + let return = Monad.return + let fail = Monad.fail + let (>>=) = Monad.(>>=) + + let (>>|) m f = m >>= fun x -> return (f x) + + let (<$>) f m = m >>| f + let (<*>) f m = f >>= fun f -> m >>| f + + let lift = (>>|) + let lift2 f m1 m2 = f <$> m1 <*> m2 + let lift3 f m1 m2 m3 = f <$> m1 <*> m2 <*> m3 + let lift4 f m1 m2 m3 m4 = f <$> m1 <*> m2 <*> m3 <*> m4 + + let ( *>) a b = a >>= fun _ -> b + let (<* ) a b = a >>= fun x -> b >>| fun _ -> x +end diff --git a/lib/vendored/vangstrom/lib/vangstrom.ml b/lib/vendored/vangstrom/lib/vangstrom.ml new file mode 100644 index 0000000..0430e6a --- /dev/null +++ b/lib/vendored/vangstrom/lib/vangstrom.ml @@ -0,0 +1,753 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +module Bigarray = struct + (* Do not access Bigarray operations directly. If anything's needed, refer to + * the internal Bigstring module. *) +end + +type bigstring = Bigstringaf.t + + +module Unbuffered = struct + include Parser + + include Exported_state + + type more = More.t = + | Complete + | Incomplete +end + +include Unbuffered +include Parser.Monad +include Parser.Choice + +module Buffered = struct + type unconsumed = Buffering.unconsumed = + { buf : bigstring + ; off : int + ; len : int } + + type input = + [ `Bigstring of bigstring + | `String of string ] + + type 'a state = + | Partial of ([ input | `Eof ] -> 'a state) + | Done of unconsumed * 'a + | Fail of unconsumed * string list * string + + let from_unbuffered_state ~f buffering = function + | Unbuffered.Partial p -> Partial (f p) + | Unbuffered.Done(consumed, v) -> + let unconsumed = Buffering.unconsumed ~shift:consumed buffering in + Done(unconsumed, v) + | Unbuffered.Fail(consumed, marks, msg) -> + let unconsumed = Buffering.unconsumed ~shift:consumed buffering in + Fail(unconsumed, marks, msg) + + let parse ?(initial_buffer_size=0x1000) p = + if initial_buffer_size < 1 then + failwith "parse: invalid argument, initial_buffer_size < 1"; + let buffering = Buffering.create initial_buffer_size in + let rec f p input = + Buffering.shift buffering p.committed; + let more : More.t = + match input with + | `Eof -> Complete + | #input as input -> + Buffering.feed_input buffering input; + Incomplete + in + let for_reading = Buffering.for_reading buffering in + p.continue for_reading ~off:0 ~len:(Bigstringaf.length for_reading) more + |> from_unbuffered_state buffering ~f + in + Unbuffered.parse p + |> from_unbuffered_state buffering ~f + + let feed state input = + match state with + | Partial k -> k input + | Fail(unconsumed, marks, msg) -> + begin match input with + | `Eof -> state + | #input as input -> + let buffering = Buffering.of_unconsumed unconsumed in + Buffering.feed_input buffering input; + Fail(Buffering.unconsumed buffering, marks, msg) + end + | Done(unconsumed, v) -> + begin match input with + | `Eof -> state + | #input as input -> + let buffering = Buffering.of_unconsumed unconsumed in + Buffering.feed_input buffering input; + Done(Buffering.unconsumed buffering, v) + end + + let state_to_option = function + | Done(_, v) -> Some v + | Partial _ -> None + | Fail _ -> None + + let state_to_result = function + | Partial _ -> Error "incomplete input" + | Done(_, v) -> Ok v + | Fail(_, marks, msg) -> Error (Unbuffered.fail_to_string marks msg) + + let state_to_unconsumed = function + | Done(unconsumed, _) + | Fail(unconsumed, _, _) -> Some unconsumed + | Partial _ -> None + +end + +(** BEGIN: getting input *) + +let rec prompt input pos fail succ = + (* [prompt] should only call [succ] if it has received more input. If there + * is no chance that the input will grow, i.e., [more = Complete], then + * [prompt] should call [fail]. Otherwise (in the case where the input + * hasn't grown but [more = Incomplete] just prompt again. *) + let parser_uncommitted_bytes = Input.parser_uncommitted_bytes input in + let parser_committed_bytes = Input.parser_committed_bytes input in + (* The continuation should not hold any references to input above. *) + let continue input ~off ~len more = + if len < parser_uncommitted_bytes then + failwith "prompt: input shrunk!"; + let input = Input.create input ~off ~len ~committed_bytes:parser_committed_bytes in + if len = parser_uncommitted_bytes then + match (more : More.t) with + | Complete -> fail input pos More.Complete + | Incomplete -> prompt input pos fail succ + else + succ input pos more + in + State.Partial { committed = Input.bytes_for_client_to_commit input; continue } + +let demand_input = + { run = fun input pos more fail succ -> + match (more : More.t) with + | Complete -> fail input pos more [] "not enough input" + | Incomplete -> + let succ' input' pos' more' = succ input' pos' more' () + and fail' input' pos' more' = fail input' pos' more' [] "not enough input" in + prompt input pos fail' succ' + } + +let ensure_suspended n input pos more fail succ = + let rec go = + { run = fun input' pos' more' fail' succ' -> + if pos' + n <= Input.length input' then + succ' input' pos' more' () + else + (demand_input *> go).run input' pos' more' fail' succ' + } + in + (demand_input *> go).run input pos more fail succ + +let unsafe_apply len ~f = + { run = fun input pos more _fail succ -> + succ input (pos + len) more (Input.apply input pos len ~f) + } + +let unsafe_apply_opt len ~f = + { run = fun input pos more fail succ -> + match Input.apply input pos len ~f with + | Error e -> fail input pos more [] e + | Ok x -> succ input (pos + len) more x + } + +let ensure n p = + { run = fun input pos more fail succ -> + if pos + n <= Input.length input + then p.run input pos more fail succ + else + let succ' input' pos' more' () = p.run input' pos' more' fail succ in + ensure_suspended n input pos more fail succ' } + +(** END: getting input *) + +let at_end_of_input = + { run = fun input pos more _ succ -> + if pos < Input.length input then + succ input pos more false + else match more with + | Complete -> succ input pos more true + | Incomplete -> + let succ' input' pos' more' = succ input' pos' more' false + and fail' input' pos' more' = succ input' pos' more' true in + prompt input pos fail' succ' + } + +let end_of_input = + at_end_of_input + >>= function + | true -> return () + | false -> fail "end_of_input" + +let advance n = + if n < 0 + then fail "advance" + else + let p = + { run = fun input pos more _fail succ -> succ input (pos + n) more () } + in + ensure n p + +let pos = + { run = fun input pos more _fail succ -> succ input pos more pos } + +let available = + { run = fun input pos more _fail succ -> + succ input pos more (Input.length input - pos) + } + +let commit = + { run = fun input pos more _fail succ -> + Input.commit input pos; + succ input pos more () } + +(* Do not use this if [p] contains a [commit]. *) +let unsafe_lookahead p = + { run = fun input pos more fail succ -> + let succ' input' _ more' v = succ input' pos more' v in + p.run input pos more fail succ' } + +let peek_char = + { run = fun input pos more _fail succ -> + if pos < Input.length input then + succ input pos more (Some (Input.unsafe_get_char input pos)) + else if more = Complete then + succ input pos more None + else + let succ' input' pos' more' = + succ input' pos' more' (Some (Input.unsafe_get_char input' pos')) + and fail' input' pos' more' = + succ input' pos' more' None in + prompt input pos fail' succ' + } + +(* This parser is too important to not be optimized. Do a custom job. *) +let rec peek_char_fail = + { run = fun input pos more fail succ -> + if pos < Input.length input + then succ input pos more (Input.unsafe_get_char input pos) + else + let succ' input' pos' more' () = + peek_char_fail.run input' pos' more' fail succ in + ensure_suspended 1 input pos more fail succ' } + +let satisfy f = + { run = fun input pos more fail succ -> + if pos < Input.length input then + let c = Input.unsafe_get_char input pos in + if f c + then succ input (pos + 1) more c + else Printf.ksprintf (fail input pos more []) "satisfy: %C" c + else + let succ' input' pos' more' () = + let c = Input.unsafe_get_char input' pos' in + if f c + then succ input' (pos' + 1) more' c + else Printf.ksprintf (fail input' pos' more' []) "satisfy: %C" c + in + ensure_suspended 1 input pos more fail succ' } + +let char c = + let p = + { run = fun input pos more fail succ -> + if Input.unsafe_get_char input pos = c + then succ input (pos + 1) more c + else fail input pos more [] (Printf.sprintf "char %C" c) } + in + ensure 1 p + +let not_char c = + let p = + { run = fun input pos more fail succ -> + let c' = Input.unsafe_get_char input pos in + if c <> c' + then succ input (pos + 1) more c' + else fail input pos more [] (Printf.sprintf "not char %C" c) } + in + ensure 1 p + +let any_char = + let p = + { run = fun input pos more _fail succ -> + succ input (pos + 1) more (Input.unsafe_get_char input pos) } + in + ensure 1 p + +let int8 i = + let p = + { run = fun input pos more fail succ -> + let c = Char.code (Input.unsafe_get_char input pos) in + if c = i land 0xff + then succ input (pos + 1) more c + else fail input pos more [] (Printf.sprintf "int8 %d" i) } + in + ensure 1 p + +let any_uint8 = + let p = + { run = fun input pos more _fail succ -> + let c = Input.unsafe_get_char input pos in + succ input (pos + 1) more (Char.code c) } + in + ensure 1 p + +let any_int8 = + (* https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtendRisky *) + let s = Sys.int_size - 8 in + let p = + { run = fun input pos more _fail succ -> + let c = Input.unsafe_get_char input pos in + succ input (pos + 1) more ((Char.code c lsl s) asr s) } + in + ensure 1 p + +let skip f = + let p = + { run = fun input pos more fail succ -> + if f (Input.unsafe_get_char input pos) + then succ input (pos + 1) more () + else fail input pos more [] "skip" } + in + ensure 1 p + +let rec count_while ~init ~f ~with_buffer = + { run = fun input pos more fail succ -> + let len = Input.count_while input (pos + init) ~f in + let input_len = Input.length input in + let init' = init + len in + (* Check if the loop terminated because it reached the end of the input + * buffer. If so, then prompt for additional input and continue. *) + if pos + init' < input_len || more = Complete + then succ input (pos + init') more (Input.apply input pos init' ~f:with_buffer) + else + let succ' input' pos' more' = + (count_while ~init:init' ~f ~with_buffer).run input' pos' more' fail succ + and fail' input' pos' more' = + succ input' (pos' + init') more' (Input.apply input' pos' init' ~f:with_buffer) + in + prompt input pos fail' succ' + } + +let rec count_while1 ~f ~with_buffer = + { run = fun input pos more fail succ -> + let len = Input.count_while input pos ~f in + let input_len = Input.length input in + (* Check if the loop terminated because it reached the end of the input + * buffer. If so, then prompt for additional input and continue. *) + if len < 1 + then + if pos < input_len || more = Complete + then fail input pos more [] "count_while1" + else + let succ' input' pos' more' = + (count_while1 ~f ~with_buffer).run input' pos' more' fail succ + and fail' input' pos' more' = + fail input' pos' more' [] "count_while1" + in + prompt input pos fail' succ' + else if pos + len < input_len || more = Complete + then succ input (pos + len) more (Input.apply input pos len ~f:with_buffer) + else + let succ' input' pos' more' = + (count_while ~init:len ~f ~with_buffer).run input' pos' more' fail succ + and fail' input' pos' more' = + succ input' (pos' + len) more' (Input.apply input' pos' len ~f:with_buffer) + in + prompt input pos fail' succ' + } + +let string_ f s = + (* XXX(seliopou): Inefficient. Could check prefix equality to short-circuit + * the io. *) + let len = String.length s in + ensure len (unsafe_apply_opt len ~f:(fun buffer ~off ~len -> + let i = ref 0 in + while !i < len && Char.equal (f (Bigstringaf.unsafe_get buffer (off + !i))) + (f (String.unsafe_get s !i)) + do + incr i + done; + if len = !i + then Ok (Bigstringaf.substring buffer ~off ~len) + else Error "string")) + +let string s = string_ (fun x -> x) s +let string_ci s = string_ Char.lowercase_ascii s + +let skip_while f = + count_while ~init:0 ~f ~with_buffer:(fun _ ~off:_ ~len:_ -> ()) + +let take n = + if n < 0 + then fail "take: n < 0" + else + let n = max n 0 in + ensure n (unsafe_apply n ~f:Bigstringaf.substring) + +let take_bigstring n = + if n < 0 + then fail "take_bigstring: n < 0" + else + let n = max n 0 in + ensure n (unsafe_apply n ~f:Bigstringaf.copy) + +let take_bigstring_while f = + count_while ~init:0 ~f ~with_buffer:Bigstringaf.copy + +let take_bigstring_while1 f = + count_while1 ~f ~with_buffer:Bigstringaf.copy + +let take_bigstring_till f = + take_bigstring_while (fun c -> not (f c)) + +let peek_string n = + unsafe_lookahead (take n) + +let take_while f = + count_while ~init:0 ~f ~with_buffer:Bigstringaf.substring + +let take_while1 f = + count_while1 ~f ~with_buffer:Bigstringaf.substring + +let take_till f = + take_while (fun c -> not (f c)) + +let choice ?(failure_msg="no more choices") ps = + List.fold_right (<|>) ps (fail failure_msg) + +let fix_direct f = + let rec p = lazy (f r) + and r = { run = fun buf pos more fail succ -> + (Lazy.force p).run buf pos more fail succ } + in + r + +let fix_lazy f = + let max_steps = 20 in + let steps = ref max_steps in + let rec p = lazy (f r) + and r = { run = fun buf pos more fail succ -> + decr steps; + if !steps < 0 + then ( + steps := max_steps; + State.Lazy (lazy ((Lazy.force p).run buf pos more fail succ))) + else + (Lazy.force p).run buf pos more fail succ + } + in + r + +let fix = match Sys.backend_type with + | Native -> fix_direct + | Bytecode -> fix_direct + | Other _ -> fix_lazy + +let option x p = + p <|> return x + +let cons x xs = x :: xs + +let rec list ps = + match ps with + | [] -> return [] + | p::ps -> lift2 cons p (list ps) + +let count n p = + if n < 0 + then fail "count: n < 0" + else + let rec loop = function + | 0 -> return [] + | n -> lift2 cons p (loop (n - 1)) + in + loop n + +let many p = + fix (fun m -> + (lift2 cons p m) <|> return []) + +let many1 p = + lift2 cons p (many p) + +let many_till p t = + fix (fun m -> + (t *> return []) <|> (lift2 cons p m)) + +let not_followed_by p = + { run = fun input pos more fail succ -> + let succ' _input _pos _more _ = + fail input pos more [] "not_followed_by refuted" in + let fail' _input _pos _more _ _ = + succ input pos more () in + p.run input pos more fail' succ' + } + +let sep_by1 s p = + fix (fun m -> + lift2 cons p ((s *> m) <|> return [])) + +let sep_by s p = + (lift2 cons p ((s *> sep_by1 s p) <|> return [])) <|> return [] + +let skip_many p = + fix (fun m -> + (p *> m) <|> return ()) + +let skip_many1 p = + p *> skip_many p + +let end_of_line = + (char '\n' *> return ()) <|> (string "\r\n" *> return ()) "end_of_line" + +let scan_ state f ~with_buffer = + { run = fun input pos more fail succ -> + let state = ref state in + let parser = + count_while ~init:0 ~f:(fun c -> + match f !state c with + | None -> false + | Some state' -> state := state'; true) + ~with_buffer + >>| fun x -> x, !state + in + parser.run input pos more fail succ } + +let scan state f = + scan_ state f ~with_buffer:Bigstringaf.substring + +let scan_state state f = + scan_ state f ~with_buffer:(fun _ ~off:_ ~len:_ -> ()) + >>| fun ((), state) -> state + +let scan_string state f = + scan state f >>| fst + +let consume_with p f = + { run = fun input pos more fail succ -> + let start = pos in + let parser_committed_bytes = Input.parser_committed_bytes input in + let succ' input' pos' more' _ = + if parser_committed_bytes <> Input.parser_committed_bytes input' + then fail input' pos' more' [] "consumed: parser committed" + else ( + let len = pos' - start in + let consumed = Input.apply input' start len ~f in + succ input' pos' more' consumed) + in + p.run input pos more fail succ' + } + +let consumed p = consume_with p Bigstringaf.substring +let consumed_bigstring p = consume_with p Bigstringaf.copy + +let both a b = lift2 (fun a b -> a, b) a b +let map t ~f = t >>| f +let bind t ~f = t >>= f +let map2 a b ~f = lift2 f a b +let map3 a b c ~f = lift3 f a b c +let map4 a b c d ~f = lift4 f a b c d + +module Let_syntax = struct + let return = return + let ( >>| ) = ( >>| ) + let ( >>= ) = ( >>= ) + + module Let_syntax = struct + let return = return + let map = map + let bind = bind + let both = both + let map2 = map2 + let map3 = map3 + let map4 = map4 + end +end + +let ( let+ ) = ( >>| ) +let ( let* ) = ( >>= ) +let ( and+ ) = both + +module BE = struct + (* XXX(seliopou): The pattern in both this module and [LE] are a compromise + * between efficiency and code reuse. By inlining [ensure] you can recover + * about 2 nanoseconds on average. That may add up in certain applications. + * + * This pattern does not allocate in the fast (success) path. + * *) + let int16 n = + let bytes = 2 in + let p = + { run = fun input pos more fail succ -> + if Input.unsafe_get_int16_be input pos = (n land 0xffff) + then succ input (pos + bytes) more () + else fail input pos more [] "BE.int16" } + in + ensure bytes p + + let int32 n = + let bytes = 4 in + let p = + { run = fun input pos more fail succ -> + if Int32.equal (Input.unsafe_get_int32_be input pos) n + then succ input (pos + bytes) more () + else fail input pos more [] "BE.int32" } + in + ensure bytes p + + let int64 n = + let bytes = 8 in + let p = + { run = fun input pos more fail succ -> + if Int64.equal (Input.unsafe_get_int64_be input pos) n + then succ input (pos + bytes) more () + else fail input pos more [] "BE.int64" } + in + ensure bytes p + + let any_uint16 = + ensure 2 (unsafe_apply 2 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int16_be bs off)) + + let any_int16 = + ensure 2 (unsafe_apply 2 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int16_sign_extended_be bs off)) + + let any_int32 = + ensure 4 (unsafe_apply 4 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int32_be bs off)) + + let any_int64 = + ensure 8 (unsafe_apply 8 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int64_be bs off)) + + let any_float = + ensure 4 (unsafe_apply 4 ~f:(fun bs ~off ~len:_ -> Int32.float_of_bits (Bigstringaf.unsafe_get_int32_be bs off))) + + let any_double = + ensure 8 (unsafe_apply 8 ~f:(fun bs ~off ~len:_ -> Int64.float_of_bits (Bigstringaf.unsafe_get_int64_be bs off))) +end + +module LE = struct + let int16 n = + let bytes = 2 in + let p = + { run = fun input pos more fail succ -> + if Input.unsafe_get_int16_le input pos = (n land 0xffff) + then succ input (pos + bytes) more () + else fail input pos more [] "LE.int16" } + in + ensure bytes p + + let int32 n = + let bytes = 4 in + let p = + { run = fun input pos more fail succ -> + if Int32.equal (Input.unsafe_get_int32_le input pos) n + then succ input (pos + bytes) more () + else fail input pos more [] "LE.int32" } + in + ensure bytes p + + let int64 n = + let bytes = 8 in + let p = + { run = fun input pos more fail succ -> + if Int64.equal (Input.unsafe_get_int64_le input pos) n + then succ input (pos + bytes) more () + else fail input pos more [] "LE.int64" } + in + ensure bytes p + + + let any_uint16 = + ensure 2 (unsafe_apply 2 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int16_le bs off)) + + let any_int16 = + ensure 2 (unsafe_apply 2 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int16_sign_extended_le bs off)) + + let any_int32 = + ensure 4 (unsafe_apply 4 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int32_le bs off)) + + let any_int64 = + ensure 8 (unsafe_apply 8 ~f:(fun bs ~off ~len:_ -> Bigstringaf.unsafe_get_int64_le bs off)) + + let any_float = + ensure 4 (unsafe_apply 4 ~f:(fun bs ~off ~len:_ -> Int32.float_of_bits (Bigstringaf.unsafe_get_int32_le bs off))) + + let any_double = + ensure 8 (unsafe_apply 8 ~f:(fun bs ~off ~len:_ -> Int64.float_of_bits (Bigstringaf.unsafe_get_int64_le bs off))) +end + +module Unsafe = struct + let take n f = + let n = max n 0 in + ensure n (unsafe_apply n ~f) + + let peek n f = + unsafe_lookahead (take n f) + + let take_while check f = + count_while ~init:0 ~f:check ~with_buffer:f + + let take_while1 check f = + count_while1 ~f:check ~with_buffer:f + + let take_till check f = + take_while (fun c -> not (check c)) f +end + +module Consume = struct + type t = + | Prefix + | All +end + +let parse_bigstring ~consume p bs = + let p = + match (consume : Consume.t) with + | Prefix -> p + | All -> p <* end_of_input + in + Unbuffered.parse_bigstring p bs + +let parse_string ~consume p s = + let len = String.length s in + let bs = Bigstringaf.create len in + Bigstringaf.unsafe_blit_from_string s ~src_off:0 bs ~dst_off:0 ~len; + parse_bigstring ~consume p bs diff --git a/lib/vendored/vangstrom/lib/vangstrom.mli b/lib/vendored/vangstrom/lib/vangstrom.mli new file mode 100644 index 0000000..b1a8f2d --- /dev/null +++ b/lib/vendored/vangstrom/lib/vangstrom.mli @@ -0,0 +1,684 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +(** Parser combinators built for speed and memory-efficiency. + + Angstrom is a parser-combinator library that provides monadic and + applicative interfaces for constructing parsers with unbounded lookahead. + Its parsers can consume input incrementally, whether in a blocking or + non-blocking environment. To achieve efficient incremental parsing, + Angstrom offers both a buffered and unbuffered interface to input streams, + with the {!module:Unbuffered} interface enabling zero-copy IO. With these + features and low-level iteration parser primitives like {!take_while} and + {!skip_while}, Angstrom makes it easy to write efficient, expressive, and + reusable parsers suitable for high-performance applications. *) + + +type +'a t +(** A parser for values of type ['a]. *) + + +type bigstring = + (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t + +(** {2 Basic parsers} *) + +val peek_char : char option t +(** [peek_char] accepts any char and returns it, or returns [None] if the end + of input has been reached. + + This parser does not advance the input. Use it for lookahead. *) + +val peek_char_fail : char t +(** [peek_char_fail] accepts any char and returns it. If end of input has been + reached, it will fail. + + This parser does not advance the input. Use it for lookahead. *) + +val peek_string : int -> string t +(** [peek_string n] accepts exactly [n] characters and returns them as a + string. If there is not enough input, it will fail. + + This parser does not advance the input. Use it for lookahead. *) + +val char : char -> char t +(** [char c] accepts [c] and returns it. *) + +val not_char : char -> char t +(** [not_char] accepts any character that is not [c] and returns the matched + character. *) + +val any_char : char t +(** [any_char] accepts any character and returns it. *) + +val satisfy : (char -> bool) -> char t +(** [satisfy f] accepts any character for which [f] returns [true] and + returns the accepted character. In the case that none of the parser + succeeds, then the parser will fail indicating the offending + character. *) + +val string : string -> string t +(** [string s] accepts [s] exactly and returns it. *) + +val string_ci : string -> string t +(** [string_ci s] accepts [s], ignoring case, and returns the matched string, + preserving the case of the original input. *) + +val skip : (char -> bool) -> unit t +(** [skip f] accepts any character for which [f] returns [true] and discards + the accepted character. [skip f] is equivalent to [satisfy f] but discards + the accepted character. *) + +val skip_while : (char -> bool) -> unit t +(** [skip_while f] accepts input as long as [f] returns [true] and discards + the accepted characters. *) + +val take : int -> string t +(** [take n] accepts exactly [n] characters of input and returns them as a + string. *) + +val take_while : (char -> bool) -> string t +(** [take_while f] accepts input as long as [f] returns [true] and returns the + accepted characters as a string. + + This parser does not fail. If [f] returns [false] on the first character, + it will return the empty string. *) + +val take_while1 : (char -> bool) -> string t +(** [take_while1 f] accepts input as long as [f] returns [true] and returns the + accepted characters as a string. + + This parser requires that [f] return [true] for at least one character of + input, and will fail otherwise. *) + +val take_till : (char -> bool) -> string t +(** [take_till f] accepts input as long as [f] returns [false] and returns the + accepted characters as a string. + + This parser does not fail. If [f] returns [true] on the first character, it + will return the empty string. *) + +val consumed : _ t -> string t +(** [consumed p] runs [p] and returns the contents that were consumed during the + parsing as a string *) + +val take_bigstring : int -> bigstring t +(** [take_bigstring n] accepts exactly [n] characters of input and returns them + as a newly allocated bigstring. *) + +val take_bigstring_while : (char -> bool) -> bigstring t +(** [take_bigstring_while f] accepts input as long as [f] returns [true] and + returns the accepted characters as a newly allocated bigstring. + + This parser does not fail. If [f] returns [false] on the first character, + it will return the empty bigstring. *) + +val take_bigstring_while1 : (char -> bool) -> bigstring t +(** [take_bigstring_while1 f] accepts input as long as [f] returns [true] and + returns the accepted characters as a newly allocated bigstring. + + This parser requires that [f] return [true] for at least one character of + input, and will fail otherwise. *) + +val take_bigstring_till : (char -> bool) -> bigstring t +(** [take_bigstring_till f] accepts input as long as [f] returns [false] and + returns the accepted characters as a newly allocated bigstring. + + This parser does not fail. If [f] returns [true] on the first character, it + will return the empty bigstring. *) + +val consumed_bigstring : _ t -> bigstring t +(** [consumed p] runs [p] and returns the contents that were consumed during the + parsing as a bigstring *) + +val advance : int -> unit t +(** [advance n] advances the input [n] characters, failing if the remaining + input is less than [n]. *) + +val end_of_line : unit t +(** [end_of_line] accepts either a line feed [\n], or a carriage return + followed by a line feed [\r\n] and returns unit. *) + +val at_end_of_input : bool t +(** [at_end_of_input] returns whether the end of the end of input has been + reached. This parser always succeeds. *) + +val end_of_input : unit t +(** [end_of_input] succeeds if all the input has been consumed, and fails + otherwise. *) + +val scan : 'state -> ('state -> char -> 'state option) -> (string * 'state) t +(** [scan init f] consumes until [f] returns [None]. Returns the final state + before [None] and the accumulated string *) + +val scan_state : 'state -> ('state -> char -> 'state option) -> 'state t +(** [scan_state init f] is like {!scan} but only returns the final state before + [None]. Much more efficient than {!scan}. *) + +val scan_string : 'state -> ('state -> char -> 'state option) -> string t +(** [scan_string init f] is like {!scan} but discards the final state and returns + the accumulated string. *) + +val int8 : int -> int t +(** [int8 i] accepts one byte that matches the lower-order byte of [i] and + returns unit. *) + +val any_uint8 : int t +(** [any_uint8] accepts any byte and returns it as an unsigned int8. *) + +val any_int8 : int t +(** [any_int8] accepts any byte and returns it as a signed int8. *) + +(** Big endian parsers *) +module BE : sig + val int16 : int -> unit t + (** [int16 i] accept two bytes that match the two lower order bytes of [i] + and returns unit. *) + + val int32 : int32 -> unit t + (** [int32 i] accept four bytes that match the four bytes of [i] + and returns unit. *) + + val int64 : int64 -> unit t + (** [int64 i] accept eight bytes that match the eight bytes of [i] and + returns unit. *) + + val any_int16 : int t + val any_int32 : int32 t + val any_int64 : int64 t + (** [any_intN] reads [N] bits and interprets them as big endian signed integers. *) + + val any_uint16 : int t + (** [any_uint16] reads [16] bits and interprets them as a big endian unsigned + integer. *) + + val any_float : float t + (** [any_float] reads 32 bits and interprets them as a big endian floating + point value. *) + + val any_double : float t + (** [any_double] reads 64 bits and interprets them as a big endian floating + point value. *) +end + +(** Little endian parsers *) +module LE : sig + val int16 : int -> unit t + (** [int16 i] accept two bytes that match the two lower order bytes of [i] + and returns unit. *) + + val int32 : int32 -> unit t + (** [int32 i] accept four bytes that match the four bytes of [i] + and returns unit. *) + + val int64 : int64 -> unit t + (** [int32 i] accept eight bytes that match the eight bytes of [i] and + returns unit. *) + + val any_int16 : int t + val any_int32 : int32 t + val any_int64 : int64 t + (** [any_intN] reads [N] bits and interprets them as little endian signed + integers. *) + + val any_uint16 : int t + (** [uint16] reads [16] bits and interprets them as a little endian unsigned + integer. *) + + val any_float : float t + (** [any_float] reads 32 bits and interprets them as a little endian floating + point value. *) + + val any_double : float t + (** [any_double] reads 64 bits and interprets them as a little endian floating + point value. *) +end + + +(** {2 Combinators} *) + +val option : 'a -> 'a t -> 'a t +(** [option v p] runs [p], returning the result of [p] if it succeeds and [v] + if it fails. *) + + +val both : 'a t -> 'b t -> ('a * 'b) t +(** [both p q] runs [p] followed by [q] and returns both results in a tuple *) + +val list : 'a t list -> 'a list t +(** [list ps] runs each [p] in [ps] in sequence, returning a list of results of + each [p]. *) + +val count : int -> 'a t -> 'a list t +(** [count n p] runs [p] [n] times, returning a list of the results. *) + +val many : 'a t -> 'a list t +(** [many p] runs [p] {i zero} or more times and returns a list of results from + the runs of [p]. *) + +val many1 : 'a t -> 'a list t +(** [many1 p] runs [p] {i one} or more times and returns a list of results from + the runs of [p]. *) + +val many_till : 'a t -> _ t -> 'a list t +(** [many_till p e] runs parser [p] {i zero} or more times until action [e] + succeeds and returns the list of result from the runs of [p]. *) + +val sep_by : _ t -> 'a t -> 'a list t +(** [sep_by s p] runs [p] {i zero} or more times, interspersing runs of [s] in between. *) + +val sep_by1 : _ t -> 'a t -> 'a list t +(** [sep_by1 s p] runs [p] {i one} or more times, interspersing runs of [s] in between. *) + +val not_followed_by : _ t -> unit t +(** [not_followed_by p] succeeds when [p] does not succeed at the current + position. Otherwise, it fails. It does not consume input. *) + +val skip_many : _ t -> unit t +(** [skip_many p] runs [p] {i zero} or more times, discarding the results. *) + +val skip_many1 : _ t -> unit t +(** [skip_many1 p] runs [p] {i one} or more times, discarding the results. *) + +val fix : ('a t -> 'a t) -> 'a t +(** [fix f] computes the fixpoint of [f] and runs the resultant parser. The + argument that [f] receives is the result of [fix f], which [f] must use, + paradoxically, to define [fix f]. + + [fix] is useful when constructing parsers for inductively-defined types + such as sequences, trees, etc. Consider for example the implementation of + the {!many} combinator defined in this library: + +{[let many p = + fix (fun m -> + (cons <$> p <*> m) <|> return [])]} + + [many p] is a parser that will run [p] zero or more times, accumulating the + result of every run into a list, returning the result. It's defined by + passing [fix] a function. This function assumes its argument [m] is a + parser that behaves exactly like [many p]. You can see this in the + expression comprising the left hand side of the alternative operator + [<|>]. This expression runs the parser [p] followed by the parser [m], and + after which the result of [p] is cons'd onto the list that [m] produces. + The right-hand side of the alternative operator provides a base case for + the combinator: if [p] fails and the parse cannot proceed, return an empty + list. + + Another way to illustrate the uses of [fix] is to construct a JSON parser. + Assuming that parsers exist for the basic types such as [false], [true], + [null], strings, and numbers, the question then becomes how to define a + parser for objects and arrays? Both contain values that are themselves JSON + values, so it seems as though it's impossible to write a parser that will + accept JSON objects and arrays before writing a parser for JSON values as a + whole. + + This is the exact situation that [fix] was made for. By defining the + parsers for arrays and objects within the function that you pass to [fix], + you will gain access to a parser that you can use to parse JSON values, the + very parser you are defining! + +{[let json = + fix (fun json -> + let arr = char '[' *> sep_by (char ',') json <* char ']' in + let obj = char '{' *> ... json ... <* char '}' in + choice [str; num; arr json, ...])]} *) + + +(** {2 Alternatives} *) + +val (<|>) : 'a t -> 'a t -> 'a t +(** [p <|> q] runs [p] and returns the result if succeeds. If [p] fails, then + the input will be reset and [q] will run instead. *) + +val choice : ?failure_msg:string -> 'a t list -> 'a t +(** [choice ?failure_msg ts] runs each parser in [ts] in order until one + succeeds and returns that result. In the case that none of the parser + succeeds, then the parser will fail with the message [failure_msg], if + provided, or a much less informative message otherwise. *) + +val () : 'a t -> string -> 'a t +(** [p name] associates [name] with the parser [p], which will be reported + in the case of failure. *) + +val commit : unit t +(** [commit] prevents backtracking beyond the current position of the input, + allowing the manager of the input buffer to reuse the preceding bytes for + other purposes. + + The {!module:Unbuffered} parsing interface will report directly to the + caller the number of bytes committed to the when returning a + {!Unbuffered.state.Partial} state, allowing the caller to reuse those bytes + for any purpose. The {!module:Buffered} will keep track of the region of + committed bytes in its internal buffer and reuse that region to store + additional input when necessary. *) + + +(** {2 Monadic/Applicative interface} *) + +val return : 'a -> 'a t +(** [return v] creates a parser that will always succeed and return [v] *) + +val fail : string -> _ t +(** [fail msg] creates a parser that will always fail with the message [msg] *) + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +(** [p >>= f] creates a parser that will run [p], pass its result to [f], run + the parser that [f] produces, and return its result. *) + +val bind : 'a t -> f:('a -> 'b t) -> 'b t +(** [bind] is a prefix version of [>>=] *) + +val (>>|) : 'a t -> ('a -> 'b) -> 'b t +(** [p >>| f] creates a parser that will run [p], and if it succeeds with + result [v], will return [f v] *) + +val (<*>) : ('a -> 'b) t -> 'a t -> 'b t +(** [f <*> p] is equivalent to [f >>= fun f -> p >>| f]. *) + +val (<$>) : ('a -> 'b) -> 'a t -> 'b t +(** [f <$> p] is equivalent to [p >>| f] *) + +val ( *>) : _ t -> 'a t -> 'a t +(** [p *> q] runs [p], discards its result and then runs [q], and returns its + result. *) + +val (<* ) : 'a t -> _ t -> 'a t +(** [p <* q] runs [p], then runs [q], discards its result, and returns the + result of [p]. *) + +val lift : ('a -> 'b) -> 'a t -> 'b t +val lift2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t +val lift3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t +val lift4 : ('a -> 'b -> 'c -> 'd -> 'e) -> 'a t -> 'b t -> 'c t -> 'd t -> 'e t +(** The [liftn] family of functions promote functions to the parser monad. + For any of these functions, the following equivalence holds: + +{[liftn f p1 ... pn = f <$> p1 <*> ... <*> pn]} + + These functions are more efficient than using the applicative interface + directly, mostly in terms of memory allocation but also in terms of speed. + Prefer them over the applicative interface, even when the arity of the + function to be lifted exceeds the maximum [n] for which there is an + implementation for [liftn]. In other words, if [f] has an arity of [5] but + only [lift4] is provided, do the following: + +{[lift4 f m1 m2 m3 m4 <*> m5]} + + Even with the partial application, it will be more efficient than the + applicative implementation. *) + +val map : 'a t -> f:('a -> 'b) -> 'b t +val map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t +val map3 : 'a t -> 'b t -> 'c t -> f:('a -> 'b -> 'c -> 'd) -> 'd t +val map4 : 'a t -> 'b t -> 'c t -> 'd t -> f:('a -> 'b -> 'c -> 'd -> 'e) -> 'e t +(** The [mapn] family of functions are just like [liftn], with a slightly + different interface. *) + +(** The [Let_syntax] module is intended to be used with the [ppx_let] + pre-processor, and just contains copies of functions described elsewhere. *) +module Let_syntax : sig + val return : 'a -> 'a t + val ( >>| ) : 'a t -> ('a -> 'b) -> 'b t + val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t + + module Let_syntax : sig + val return : 'a -> 'a t + val map : 'a t -> f:('a -> 'b) -> 'b t + val bind : 'a t -> f:('a -> 'b t) -> 'b t + val both : 'a t -> 'b t -> ('a * 'b) t + val map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + val map3 : 'a t -> 'b t -> 'c t -> f:('a -> 'b -> 'c -> 'd) -> 'd t + val map4 : 'a t -> 'b t -> 'c t -> 'd t -> f:('a -> 'b -> 'c -> 'd -> 'e) -> 'e t + end +end + +val ( let+ ) : 'a t -> ('a -> 'b) -> 'b t +val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t +val ( and+ ) : 'a t -> 'b t -> ('a * 'b) t + +(** Unsafe Operations on Angstrom's Internal Buffer + + These functions are considered {b unsafe} as they expose the input buffer + to client code without any protections against modification, or leaking + references. They are exposed to support performance-sensitive parsers that + want to avoid allocation at all costs. Client code should take care to + write the input buffer callback functions such that they: + + {ul + {- do not modify the input buffer {i outside} of the range + [\[off, off + len)];} + {- do not modify the input buffer {i inside} of the range + [\[off, off + len)] if the parser might backtrack; and} + {- do not return any direct or indirect references to the input buffer.}} + + If the input buffer callback functions do not do any of these things, then + the client may consider their use safe. *) +module Unsafe : sig + + val take : int -> (bigstring -> off:int -> len:int -> 'a) -> 'a t + (** [take n f] accepts exactly [n] characters of input into the parser's + internal buffer then calls [f buffer ~off ~len]. [buffer] is the + parser's internal buffer. [off] is the offset from the start of [buffer] + containing the requested content. [len] is the length of the requested + content. [len] is guaranteed to be equal to [n]. *) + + val take_while : (char -> bool) -> (bigstring -> off:int -> len:int -> 'a) -> 'a t + (** [take_while check f] accepts input into the parser's interal buffer as + long as [check] returns [true] then calls [f buffer ~off ~len]. [buffer] + is the parser's internal buffer. [off] is the offset from the start of + [buffer] containing the requested content. [len] is the length of the + content matched by [check]. + + This parser does not fail. If [check] returns [false] on the first + character, [len] will be [0]. *) + + val take_while1 : (char -> bool) -> (bigstring -> off:int -> len:int -> 'a) -> 'a t + (** [take_while1 check f] accepts input into the parser's interal buffer as + long as [check] returns [true] then calls [f buffer ~off ~len]. [buffer] + is the parser's internal buffer. [off] is the offset from the start of + [buffer] containing the requested content. [len] is the length of the + content matched by [check]. + + This parser requires that [f] return [true] for at least one character of + input, and will fail otherwise. *) + + val take_till : (char -> bool) -> (bigstring -> off:int -> len:int -> 'a) -> 'a t + (** [take_till check f] accepts input into the parser's interal buffer as + long as [check] returns [false] then calls [f buffer ~off ~len]. [buffer] + is the parser's internal buffer. [off] is the offset from the start of + [buffer] containing the requested content. [len] is the length of the + content matched by [check]. + + This parser does not fail. If [check] returns [true] on the first + character, [len] will be [0]. *) + + val peek : int -> (bigstring -> off:int -> len:int -> 'a) -> 'a t + (** [peek n ~f] accepts exactly [n] characters and calls [f buffer ~off ~len] + with [len = n]. If there is not enough input, it will fail. + + This parser does not advance the input. Use it for lookahead. *) +end + + +(** {2 Running} *) + +module Consume : sig + type t = + | Prefix + | All +end + +val parse_bigstring : consume:Consume.t -> 'a t -> bigstring -> ('a, string) result + +(** [parse_bigstring ~consume t bs] runs [t] on [bs]. The parser will receive + an [`Eof] after all of [bs] has been consumed. Passing {!Prefix} in the + [consume] argument allows the parse to successfully complete without + reaching eof. To require the parser to reach eof, pass {!All} in the + [consume] argument. + + For use-cases requiring that the parser be fed input incrementally, see the + {!module:Buffered} and {!module:Unbuffered} modules below. *) + + +val parse_string : consume:Consume.t -> 'a t -> string -> ('a, string) result +(** [parse_string ~consume t bs] runs [t] on [bs]. The parser will receive an + [`Eof] after all of [bs] has been consumed. Passing {!Prefix} in the + [consume] argument allows the parse to successfully complete without + reaching eof. To require the parser to reach eof, pass {!All} in the + [consume] argument. + + For use-cases requiring that the parser be fed input incrementally, see the + {!module:Buffered} and {!module:Unbuffered} modules below. *) + + +(** Buffered parsing interface. + + Parsers run through this module perform internal buffering of input. The + parser state will keep track of unconsumed input and attempt to minimize + memory allocation and copying. The {!Buffered.state.Partial} parser state + will accept newly-read, incremental input and copy it into the internal + buffer. Users can feed parser states using the {!feed} function. As a + result, the interface is much easier to use than the one exposed by the + {!Unbuffered} module. + + On success or failure, any unconsumed input will be returned to the user + for additional processing. The buffer that the unconsumed input is returned + in can also be reused. *) +module Buffered : sig + type unconsumed = + { buf : bigstring + ; off : int + ; len : int } + + type input = + [ `Bigstring of bigstring + | `String of string ] + + type 'a state = + | Partial of ([ input | `Eof ] -> 'a state) (** The parser requires more input. *) + | Done of unconsumed * 'a (** The parser succeeded. *) + | Fail of unconsumed * string list * string (** The parser failed. *) + + val parse : ?initial_buffer_size:int -> 'a t -> 'a state + (** [parse ?initial_buffer_size t] runs [t] and awaits input if needed. + [parse] will allocate a buffer of size [initial_buffer_size] (defaulting + to 4k bytes) to do input buffering and automatically grows the buffer as + needed. *) + + val feed : 'a state -> [ input | `Eof ] -> 'a state + (** [feed state input] supplies the parser state with more input. If [state] is + [Partial], then parsing will continue where it left off. Otherwise, the + parser is in a [Fail] or [Done] state, in which case the [input] will be + copied into the state's buffer for later use by the caller. *) + + val state_to_option : 'a state -> 'a option + (** [state_to_option state] returns [Some v] if the parser is in the + [Done (bs, v)] state and [None] otherwise. This function has no effect on + the current state of the parser. *) + + val state_to_result : 'a state -> ('a, string) result + (** [state_to_result state] returns [Ok v] if the parser is in the [Done (bs, v)] + state and [Error msg] if it is in the [Fail] or [Partial] state. + + This function has no effect on the current state of the parser. *) + + val state_to_unconsumed : _ state -> unconsumed option + (** [state_to_unconsumed state] returns [Some bs] if [state = Done(bs, _)] or + [state = Fail(bs, _, _)] and [None] otherwise. *) + +end + +(** Unbuffered parsing interface. + + Use this module for total control over memory allocation and copying. + Parsers run through this module perform no internal buffering. Instead, the + user is responsible for managing a buffer containing the entirety of the + input that has yet to be consumed by the parser. The + {!Unbuffered.state.Partial} parser state reports to the user how much input + the parser consumed during its last run, via the + {!Unbuffered.partial.committed} field. This area of input must be discarded + before parsing can resume. Once additional input has been collected, the + unconsumed input as well as new input must be passed to the parser state + via the {!Unbuffered.partial.continue} function, together with an + indication of whether there is {!Unbuffered.more} input to come. + + The logic that must be implemented in order to make proper use of this + module is intricate and tied to your OS environment. It's advisable to use + the {!Buffered} module when initially developing and testing your parsers. + For production use-cases, consider the Async and Lwt support that this + library includes before attempting to use this module directly. *) +module Unbuffered : sig + type more = + | Complete + | Incomplete + + type 'a state = + | Partial of 'a partial (** The parser requires more input. *) + | Done of int * 'a (** The parser succeeded, consuming specified bytes. *) + | Fail of int * string list * string (** The parser failed, consuming specified bytes. *) + and 'a partial = + { committed : int + (** The number of bytes committed during the last input feeding. + Callers must drop this number of bytes from the beginning of the + input on subsequent calls. See {!commit} for additional details. *) + ; continue : bigstring -> off:int -> len:int -> more -> 'a state + (** A continuation of a parse that requires additional input. The input + should include all uncommitted input (as reported by previous partial + states) in addition to any new input that has become available, as + well as an indication of whether there is {!more} input to come. *) + } + + val parse : 'a t -> 'a state + (** [parse t] runs [t] and await input if needed. *) + + val state_to_option : 'a state -> 'a option + + (** [state_to_option state] returns [Some v] if the parser is in the + [Done (bs, v)] state and [None] otherwise. This function has no effect on the + current state of the parser. *) + + val state_to_result : 'a state -> ('a, string) result + (** [state_to_result state] returns [Ok v] if the parser is in the + [Done (bs, v)] state and [Error msg] if it is in the [Fail] or [Partial] + state. + + This function has no effect on the current state of the parser. *) +end + +(** {2 Expert Parsers} + + For people that know what they're doing. If you want to use them, read the + code. No further documentation will be provided. *) + +val pos : int t +val available : int t diff --git a/lib/vendored/vangstrom/lwt/dune b/lib/vendored/vangstrom/lwt/dune new file mode 100644 index 0000000..60d2e0e --- /dev/null +++ b/lib/vendored/vangstrom/lwt/dune @@ -0,0 +1,5 @@ +(library + (name vangstrom_lwt_unix) + (public_name comby.vangstrom-lwt-unix) + (flags :standard -safe-string) + (libraries vangstrom lwt.unix)) diff --git a/lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.ml b/lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.ml new file mode 100644 index 0000000..15a70d2 --- /dev/null +++ b/lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.ml @@ -0,0 +1,81 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Vangstrom.Buffered +open Lwt + +let default_pushback () = return_unit + +let rec buffered_state_loop pushback state in_chan bytes = + let size = Bytes.length bytes in + match state with + | Partial k -> + Lwt_io.read_into in_chan bytes 0 size + >|= begin function + | 0 -> k `Eof + | len -> + assert (len > 0); + k (`String (Bytes.(unsafe_to_string (sub bytes 0 len)))) + end + >>= fun state' -> pushback () + >>= fun () -> buffered_state_loop pushback state' in_chan bytes + | state -> return state + +let handle_parse_result state = + match state_to_unconsumed state with + | None -> assert false + | Some us -> us, state_to_result state + +let parse ?(pushback=default_pushback) p in_chan = + let size = Lwt_io.buffer_size in_chan in + let bytes = Bytes.create size in + buffered_state_loop pushback (parse ~initial_buffer_size:size p) in_chan bytes + >|= handle_parse_result + +let with_buffered_parse_state ?(pushback=default_pushback) state in_chan = + let size = Lwt_io.buffer_size in_chan in + let bytes = Bytes.create size in + begin match state with + | Partial _ -> buffered_state_loop pushback state in_chan bytes + | _ -> return state + end + >|= handle_parse_result + +let async_many e k = + Vangstrom.(skip_many (e <* commit >>| k) "async_many") + +let parse_many p write in_chan = + let wait = ref (default_pushback ()) in + let k x = wait := write x in + let pushback () = !wait in + parse ~pushback (async_many p k) in_chan diff --git a/lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.mli b/lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.mli new file mode 100644 index 0000000..e9333d3 --- /dev/null +++ b/lib/vendored/vangstrom/lwt/vangstrom_lwt_unix.mli @@ -0,0 +1,72 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Vangstrom + + +val parse + : ?pushback:(unit -> unit Lwt.t) + -> 'a t + -> Lwt_io.input_channel + -> (Buffered.unconsumed * ('a, string) result) Lwt.t + +val parse_many + : 'a t + -> ('a -> unit Lwt.t) + -> Lwt_io.input_channel + -> (Buffered.unconsumed * (unit, string) result) Lwt.t + +(** Useful for resuming a {!parse} that returns unconsumed data. Construct a + [Buffered.state] by using [Buffered.parse] and provide it into this + function. This is essentially what {!parse_many} does, so consider using + that if you don't require fine-grained control over how many times you want + the parser to succeed. + + Usage example: + + {[ + parse parser in_channel >>= fun (unconsumed, result) -> + match result with + | Ok a -> + let { buf; off; len } = unconsumed in + let state = Buffered.parse parser in + let state = Buffered.feed state (`Bigstring (Bigstringaf.sub ~off ~len buf)) in + with_buffered_parse_state state in_channel + | Error err -> failwith err + ]} *) +val with_buffered_parse_state + : ?pushback:(unit -> unit Lwt.t) + -> 'a Buffered.state + -> Lwt_io.input_channel + -> (Buffered.unconsumed * ('a, string) result) Lwt.t + diff --git a/lib/vendored/vangstrom/unix/dune b/lib/vendored/vangstrom/unix/dune new file mode 100644 index 0000000..80967ac --- /dev/null +++ b/lib/vendored/vangstrom/unix/dune @@ -0,0 +1,4 @@ +(library + (name vangstrom_unix) + (public_name comby.vangstrom-unix) + (libraries vangstrom unix)) diff --git a/lib/vendored/vangstrom/unix/vangstrom_unix.ml b/lib/vendored/vangstrom/unix/vangstrom_unix.ml new file mode 100644 index 0000000..de903d9 --- /dev/null +++ b/lib/vendored/vangstrom/unix/vangstrom_unix.ml @@ -0,0 +1,52 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Vangstrom.Buffered + +let parse ?(buf_size=0x1000) p in_chan = + let bytes = Bytes.create buf_size in + let rec loop = function + | Partial k -> + begin match input in_chan bytes 0 buf_size with + | 0 -> loop (k `Eof) + | n -> loop (k (`String (Bytes.(unsafe_to_string (sub bytes 0 n))))) + end + | state -> state + in + let state = loop (parse p) in + match state_to_unconsumed state with + | None -> assert false + | Some us -> us, state_to_result state + +let parse_many ?buf_size p k in_chan = + parse ?buf_size Vangstrom.(skip_many (p <* commit >>| k)) in_chan diff --git a/lib/vendored/vangstrom/unix/vangstrom_unix.mli b/lib/vendored/vangstrom/unix/vangstrom_unix.mli new file mode 100644 index 0000000..409617c --- /dev/null +++ b/lib/vendored/vangstrom/unix/vangstrom_unix.mli @@ -0,0 +1,48 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2016 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Vangstrom + + +val parse : + ?buf_size:int + -> 'a t + -> in_channel + -> Buffered.unconsumed * ('a, string) result + +val parse_many : + ?buf_size:int + -> 'a t + -> ('a -> unit) + -> in_channel + -> Buffered.unconsumed * (unit, string) result diff --git a/src/dune b/src/dune index 8e29c1f..a52ddb5 100644 --- a/src/dune +++ b/src/dune @@ -9,17 +9,17 @@ (modes byte exe) (names main)) -(executables - (libraries comby core opium ppx_deriving_yojson ppx_deriving_yojson.runtime cohttp-lwt-unix) - (preprocess (pps ppx_deriving_yojson ppx_let ppx_deriving.show ppx_sexp_conv)) - (modules server server_types) - (names server)) +; (executables +; (libraries comby core opium ppx_deriving_yojson ppx_deriving_yojson.runtime cohttp-lwt-unix) +; (preprocess (pps ppx_deriving_yojson ppx_let ppx_deriving.show ppx_sexp_conv)) +; (modules server server_types) +; (names server)) -(executables - (libraries comby core opium ppx_deriving_yojson ppx_deriving_yojson.runtime) - (preprocess (pps ppx_deriving_yojson ppx_let ppx_deriving.show ppx_sexp_conv)) - (modules benchmark) - (names benchmark)) +; (executables +; (libraries comby core opium ppx_deriving_yojson ppx_deriving_yojson.runtime) +; (preprocess (pps ppx_deriving_yojson ppx_let ppx_deriving.show ppx_sexp_conv)) +; (modules benchmark) +; (names benchmark)) (alias (name DEFAULT) diff --git a/src/main.ml b/src/main.ml index 221aec8..84804a4 100644 --- a/src/main.ml +++ b/src/main.ml @@ -76,7 +76,7 @@ let substitute_environment_only_and_exit metasyntax_path anonymous_arguments jso Match.Environment.of_yojson json |> function | Ok environment -> - let substituted, _ = Matchers.Rewrite.substitute ~metasyntax rewrite_template environment in + let substituted = Matchers.Rewrite.substitute ~metasyntax rewrite_template environment in Format.printf "%s@." substituted; exit 0 | Error err -> @@ -235,7 +235,6 @@ let base_command_parameters : (unit -> 'result) Command.Param.t = { verbose ; match_timeout ; dump_statistics - ; substitute_in_place ; disable_substring_matching ; fast_offset_conversion ; match_newline_toplevel @@ -262,15 +261,20 @@ let base_command_parameters : (unit -> 'result) Command.Param.t = in fun () -> Pipeline.run configuration; - match configuration.extension with - | Some ".generic" -> - Format.eprintf "@.WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning.@." - | Some extension -> - let (module M) = configuration.matcher in - if String.equal M.name "Generic" then - Format.eprintf "@.WARNING: the GENERIC matcher was used because I'm unable to guess what language to use for the file extension %s. The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning.@." extension - else if debug then Format.eprintf "@.NOTE: the %s matcher was inferred from extension %s. See '-list' to set a matcher for a specific language.@." M.name extension - | None -> () + let (module M) = configuration.matcher in + match M.name with + | "Generic" when Option.is_none override_matcher -> + Format.eprintf + "@.WARNING: the GENERIC matcher was used, because a language could not \ + be inferred from the file extension(s). The GENERIC matcher may miss \ + matches. See '-list' to set a matcher for a specific language and to \ + remove this warning, or add -matcher .generic to suppress this warning.@." + | "Generic" when Option.is_some override_matcher -> () + | _ when Option.is_none override_matcher -> + if debug then Format.eprintf + "@.NOTE: the %s matcher was inferred from the file extension. See \ + '-list' to set a matcher for a specific language.@." M.name + | _ -> () ] let default_command = diff --git a/src/server.ml b/src/server.ml index 6bbe494..8270604 100644 --- a/src/server.ml +++ b/src/server.ml @@ -141,7 +141,7 @@ let perform_environment_substitution request = let code, result = 200, Out.Substitution.to_string - { result = fst @@ Comby_kernel.Matchers.Rewrite.substitute rewrite_template environment + { result = Comby_kernel.Matchers.Rewrite.substitute rewrite_template environment ; id } in diff --git a/test/alpha/dune b/test/alpha/dune index 7bc022b..d7e5532 100644 --- a/test/alpha/dune +++ b/test/alpha/dune @@ -2,7 +2,6 @@ (name alpha_test_integration) (package comby) (modules - test_custom_metasyntax test_special_matcher_cases test_substring_disabled) (inline_tests) diff --git a/test/alpha/test_custom_metasyntax.ml b/test/alpha/test_custom_metasyntax.ml deleted file mode 100644 index 225a4aa..0000000 --- a/test/alpha/test_custom_metasyntax.ml +++ /dev/null @@ -1,188 +0,0 @@ -open Core - -open Comby_kernel - -let configuration = Matchers.Configuration.create ~match_kind:Fuzzy () - -let create syntax = - let metasyntax = Matchers.Metasyntax.{ syntax; identifier = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" } in - Option.value_exn (Matchers.Alpha.select_with_extension ~metasyntax ".go") - -let run (module M : Matchers.Matcher.S) source match_template _rewrite_template = - M.all ~configuration ~template:match_template ~source () - |> function - | [] -> print_string "No matches." - | results -> print_string (Format.asprintf "%a" Match.pp_json_lines (None, results)) - -let%expect_test "custom_metasyntax_everything" = - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ] - in - - let source = "simple(test)" in - run matcher source "simple($A)" ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":12,"line":1,"column":13}},"environment":[{"variable":"A","value":"test","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}}}],"matched":"simple(test)"}]} -|}]; - - let source = "(nested(test))" in - run matcher source "($A)" ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":14,"line":1,"column":15}},"environment":[{"variable":"A","value":"nested(test)","range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":13,"line":1,"column":14}}}],"matched":"(nested(test))"}]} -|}]; - - let source = "flat stuff yeah" in - run matcher source "flat $A yeah" ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":15,"line":1,"column":16}},"environment":[{"variable":"A","value":"stuff","range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":10,"line":1,"column":11}}}],"matched":"flat stuff yeah"}]} -|}] - -let%expect_test "custom_metasyntax_regex" = - let matcher = create - [ Regex ("$", ':', " ") - ] - in - let source = "simple(test)" in - run matcher source {|$A:\w+ |} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple"},{"range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"test","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}}}],"matched":"test"}]} -|}] - -let%expect_test "custom_metasyntax_multiple_holes" = - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "?", None)) - ] - in - - run matcher "simple(bar)" {|$FOO(?BAR)|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"BAR","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}},{"variable":"FOO","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple(bar)"}]} -|}]; - - run matcher "foo(bar)" {|?FOO($BAR)|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"BAR","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}},{"variable":"FOO","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo(bar)"}]} -|}]; - - - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "$$", None)) - ] - in - - run matcher "foo(bar.baz)" {|$$A|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}},"environment":[{"variable":"A","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo"},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}},"environment":[{"variable":"A","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"bar"},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"baz","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}}}],"matched":"baz"}]} -|}]; - - (* Expect no matches: Everything parser takes precedence. Allow folding over list to define order. *) - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "$$", None)) - ; Regex ("$", ':', " ") - ] - in - - run matcher "foo(bar.baz)" {|$M:\w+ |} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}},"environment":[{"variable":"M","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo"},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}},"environment":[{"variable":"M","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"bar"},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"M","value":"baz","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}}}],"matched":"baz"}]} -|}]; - - let matcher = create - [ Regex ("$", ':', " ") - ; Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "$$", None)) - ] - in - - run matcher "foo(bar.baz)" {|$M:\w+ |} ""; - [%expect_exact {|No matches.|}] - - -let%expect_test "custom_metasyntax_underscore" = - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "?", None)) - ] - in - - run matcher "simple(bar)" {|$_(?_)|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"_","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple(bar)"}]} -|}] - -let%expect_test "custom_metasyntax_equivalence" = - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ; Regex ("$", '~', "$") - ] - in - - run matcher "foo(foo)" {|$A($A~\w+$)|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"!@#$000000000002_A_equal","value":"foo","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}},{"variable":"A","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo(foo)"}]} -|}] - -let%expect_test "custom_metasyntax_definition_order" = - let matcher = create - [ Regex ("$", '~', "$") - ; Hole (Everything, Delimited (Some "$", None)) - ] - in - - run matcher "simple(bar)baz" {|$A($B)$C~\w+$|} ""; - [%expect_exact {|No matches.|}]; - - let matcher = create - [ Hole (Everything, Delimited (Some "$", None)) - ; Regex ("$", '~', "$") - ] - in - - run matcher "simple(bar)baz" {|$A($B)$C~\w+$|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":14,"line":1,"column":15}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"B","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}},{"variable":"C","value":"baz","range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":14,"line":1,"column":15}}}],"matched":"simple(bar)baz"}]} -|}] - -let%expect_test "custom_metasyntax_rewrite" = - let syntax = - let open Matchers.Metasyntax in - [ Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "?", None)) - ] - in - let metasyntax = Matchers.Metasyntax.{ syntax; identifier = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" } in - let matcher = Option.value_exn (Matchers.Alpha.select_with_extension ~metasyntax ".go") in - - let specification = Matchers.Specification.create ~match_template:"$A(?B)" ~rewrite_template:"??B -> $A$A" () in - let result = Pipeline.execute matcher ~metasyntax (String "simple(bar)") specification in - let output = match result with - | Replacement (_, result, _) -> result - | Matches _ -> "matches" - | Nothing -> "nothing" - in - print_string output; - [%expect_exact {|bar -> simplesimple|}]; - - let specification = Matchers.Specification.create ~match_template:"$A(?B)" ~rewrite_template:"$id() $id(a) $id(a)" () in - let result = Pipeline.execute matcher ~metasyntax (String "simple(bar)") specification in - let output = match result with - | Replacement (_, result, _) -> result - | Matches _ -> "matches" - | Nothing -> "nothing" - in - print_string output; - [%expect_exact {|1 2 2|}] - -let%expect_test "custom_metasyntax_greek_letters" = - let matcher = create - [ Hole (Alphanum, Reserved_identifiers ["α"; "β"]) - ] - in - - run matcher "simple(bar)" {|α(β)|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"α","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"β","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} -|}] - -let%expect_test "custom_metasyntax_alphanum_test" = - let matcher = create - [ Hole (Alphanum, Delimited (Some "[:", Some ":]")) - ; Hole (Alphanum, Reserved_identifiers ["α"; "β"]) - ] - in - - run matcher "simple(bar)" {|[:A:](α)|} ""; - [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"α","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} -|}] diff --git a/test/alpha/test_special_matcher_cases.ml b/test/alpha/test_special_matcher_cases.ml index 23a0bda..1ca4ea1 100644 --- a/test/alpha/test_special_matcher_cases.ml +++ b/test/alpha/test_special_matcher_cases.ml @@ -1,7 +1,7 @@ open Core +open Comby_kernel open Matchers -open Rewriter open Matchers.Alpha diff --git a/test/alpha/test_substring_disabled.ml b/test/alpha/test_substring_disabled.ml index e3debfa..39ff715 100644 --- a/test/alpha/test_substring_disabled.ml +++ b/test/alpha/test_substring_disabled.ml @@ -1,8 +1,7 @@ open Core +open Comby_kernel open Matchers -open Rewriter - open Matchers.Alpha let configuration = Configuration.create ~disable_substring_matching:true ~match_kind:Fuzzy () diff --git a/test/common/dune b/test/common/dune index 1b5baf3..f03723f 100644 --- a/test/common/dune +++ b/test/common/dune @@ -13,13 +13,14 @@ test_offset_conversion test_parse_rule test_rewrite_parts - test_parse_rewrite_template + test_parse_template test_substitute test_rewrite_rule test_integration test_match_rule test_python_string_literals test_hole_extensions + test_match_offsets test_generic test_string_literals test_c_style_comments @@ -32,6 +33,8 @@ test_user_defined_language test_nested_matches test_regex_holes + test_template_constraints + test_custom_metasyntax ) (inline_tests) (preprocess (pps ppx_expect ppx_sexp_message ppx_deriving_yojson)) diff --git a/test/common/test_bash.ml b/test/common/test_bash.ml index 74726e9..fcc1695 100644 --- a/test/common/test_bash.ml +++ b/test/common/test_bash.ml @@ -4,17 +4,6 @@ open Test_helpers open Comby_kernel open Matchers -let run (module M: Matchers.Matcher.S) source match_template rewrite_template = - M.first ~configuration match_template source - |> function - | Ok result -> - Rewrite.all ~source ~rewrite_template [result] - |> (fun x -> Option.value_exn x) - |> (fun { rewritten_source; _ } -> rewritten_source) - |> print_string - | Error _ -> - print_string rewrite_template - let%expect_test "custom_long_delimiters" = let source = {| @@ -64,9 +53,7 @@ let%expect_test "custom_long_delimiters_doesn't_work_in_go" = [%expect_exact {| case nuked blocks esac - case - block 2 - esac + case nuked blocks esac esac |}]; @@ -74,8 +61,6 @@ let%expect_test "custom_long_delimiters_doesn't_work_in_go" = [%expect_exact {| case nuked blocks esac - case - block 2 - esac + case nuked blocks esac esac |}] diff --git a/test/common/test_c.ml b/test/common/test_c.ml index 2a7c044..d61a855 100644 --- a/test/common/test_c.ml +++ b/test/common/test_c.ml @@ -4,25 +4,14 @@ open Test_helpers open Comby_kernel open Matchers -let run (module E : Engine.S) source match_template rewrite_template = - E.C.first ~configuration match_template source - |> function - | Ok result -> - Rewrite.all ~source ~rewrite_template [result] - |> (fun x -> Option.value_exn x) - |> (fun { rewritten_source; _ } -> rewritten_source) - |> print_string - | Error _ -> - print_string rewrite_template - let%expect_test "comments_1" = let source = {|match this /**/ expect end|} in let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|expect|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|expect|}] let%expect_test "comments_2" = @@ -30,9 +19,9 @@ let%expect_test "comments_2" = let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|expect|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|expect|}] let%expect_test "comments_3" = @@ -40,9 +29,9 @@ let%expect_test "comments_3" = let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|expect|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|expect|}] let%expect_test "comments_4" = @@ -50,9 +39,9 @@ let%expect_test "comments_4" = let match_template = {|match this :[1]end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|expect|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|expect|}] let%expect_test "comments_5" = @@ -60,9 +49,9 @@ let%expect_test "comments_5" = let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|expect|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|expect|}] let%expect_test "comments_6" = @@ -70,39 +59,39 @@ let%expect_test "comments_6" = let match_template = {|match this :[1] end|} in let rewrite_template = {|nothing matches|} in - run (module Alpha) source match_template rewrite_template; - [%expect_exact {|nothing matches|}]; - run (module Omega) source match_template rewrite_template; - [%expect_exact {|nothing matches|}] + run (module Alpha.C) source match_template rewrite_template; + [%expect_exact {|No matches.|}]; + run (module Omega.C) source match_template rewrite_template; + [%expect_exact {|No matches.|}] let%expect_test "comments_7" = let source = {|/* don't match /**/ this (a) end */|} in let match_template = {|match this :[1] end|} in let rewrite_template = {|nothing matches|} in - run (module Alpha) source match_template rewrite_template; - [%expect_exact {|nothing matches|}]; - run (module Omega) source match_template rewrite_template; - [%expect_exact {|nothing matches|}] + run (module Alpha.C) source match_template rewrite_template; + [%expect_exact {|No matches.|}]; + run (module Omega.C) source match_template rewrite_template; + [%expect_exact {|No matches.|}] let%expect_test "comments_8" = let source = {|(/* don't match this (a) end */)|} in let match_template = {|match this :[1] end|} in let rewrite_template = {|nothing matches|} in - run (module Alpha) source match_template rewrite_template; - [%expect_exact {|nothing matches|}]; - run (module Omega) source match_template rewrite_template; - [%expect_exact {|nothing matches|}] + run (module Alpha.C) source match_template rewrite_template; + [%expect_exact {|No matches.|}]; + run (module Omega.C) source match_template rewrite_template; + [%expect_exact {|No matches.|}] let%expect_test "comments_9" = let source = {|/* don't match this (a) end */ do match this (b) end|} in let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|/* don't match this (a) end */ do (b)|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|/* don't match this (a) end */ do (b)|}] let%expect_test "comments_10" = @@ -110,9 +99,9 @@ let%expect_test "comments_10" = let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|/* don't match this (a) end */ do ()|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|/* don't match this (a) end */ do ()|}] let%expect_test "comments_11" = @@ -120,7 +109,7 @@ let%expect_test "comments_11" = let match_template = {|match this :[1] end|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|do (b) /* don't match this (a) end */|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|do (b) /* don't match this (a) end */|}] diff --git a/test/common/test_c_separators.ml b/test/common/test_c_separators.ml index a16691e..2c1cfef 100644 --- a/test/common/test_c_separators.ml +++ b/test/common/test_c_separators.ml @@ -4,38 +4,27 @@ open Test_helpers open Comby_kernel open Matchers -let run (module E : Engine.S) source match_template rewrite_template = - E.C.first ~configuration match_template source - |> function - | Ok result -> - Rewrite.all ~source ~rewrite_template [result] - |> (fun x -> Option.value_exn x) - |> (fun { rewritten_source; _ } -> rewritten_source) - |> print_string - | Error _ -> - print_string rewrite_template - let%expect_test "whitespace_should_not_matter_between_separators" = let source = {|*p|} in let match_template = {|*:[1]|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|p|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|p|}]; let source = {|* p|} in let match_template = {|*:[1]|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {| p|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {| p|}]; let source = {|* p|} in let match_template = {|* :[1]|} in let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.C) source match_template rewrite_template; [%expect_exact {|p|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.C) source match_template rewrite_template; [%expect_exact {|p|}] diff --git a/test/common/test_c_style_comments.ml b/test/common/test_c_style_comments.ml index 1319ab2..462cd40 100644 --- a/test/common/test_c_style_comments.ml +++ b/test/common/test_c_style_comments.ml @@ -4,24 +4,14 @@ open Test_helpers open Comby_kernel open Matchers -let match_all ?(configuration = configuration) (module E : Engine.S) template source = - E.C.all ~configuration ~template ~source () - -let all ?(configuration = configuration) engine template source rewrite_template = - match_all ~configuration engine template source - |> (fun matches -> - Option.value_exn (Rewrite.all ~source ~rewrite_template matches)) - |> (fun { rewritten_source; _ } -> rewritten_source) - |> print_string - let%expect_test "rewrite_comments_1" = let template = "replace this :[1] end" in let source = "/* don't replace this () end */ do replace this () end" in let rewrite_template = "X" in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact "/* don't replace this () end */ do X"]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact "/* don't replace this () end */ do X"] let%expect_test "rewrite_comments_2" = @@ -48,12 +38,12 @@ let%expect_test "rewrite_comments_2" = |} in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact {| if (real_condition_body_must_be_empty) {} |}]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact {| if (real_condition_body_must_be_empty) {} @@ -64,65 +54,13 @@ let%expect_test "capture_comments" = let template = {|if (:[1]) { :[2] }|} in let source = {|if (true) { /* some comment */ console.log(z); }|} in - match_all (module Alpha) template source - |> print_matches; - [%expect_exact {|[ - { - "range": { - "start": { "offset": 0, "line": 1, "column": 1 }, - "end": { "offset": 48, "line": 1, "column": 49 } - }, - "environment": [ - { - "variable": "1", - "value": "true", - "range": { - "start": { "offset": 4, "line": 1, "column": 5 }, - "end": { "offset": 8, "line": 1, "column": 9 } - } - }, - { - "variable": "2", - "value": "console.log(z);", - "range": { - "start": { "offset": 31, "line": 1, "column": 32 }, - "end": { "offset": 46, "line": 1, "column": 47 } - } - } - ], - "matched": "if (true) { /* some comment */ console.log(z); }" - } -]|}]; + run_all_matches (module Alpha.C) source template; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":48,"line":1,"column":49}},"environment":[{"variable":"1","value":"true","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":8,"line":1,"column":9}}},{"variable":"2","value":"console.log(z);","range":{"start":{"offset":31,"line":1,"column":32},"end":{"offset":46,"line":1,"column":47}}}],"matched":"if (true) { /* some comment */ console.log(z); }"}]} +|}]; - match_all (module Omega) template source - |> print_matches; - [%expect_exact {|[ - { - "range": { - "start": { "offset": 0, "line": 1, "column": 1 }, - "end": { "offset": 48, "line": 1, "column": 49 } - }, - "environment": [ - { - "variable": "1", - "value": "true", - "range": { - "start": { "offset": 4, "line": 1, "column": 5 }, - "end": { "offset": 8, "line": 1, "column": 9 } - } - }, - { - "variable": "2", - "value": "console.log(z);", - "range": { - "start": { "offset": 31, "line": 1, "column": 32 }, - "end": { "offset": 46, "line": 1, "column": 47 } - } - } - ], - "matched": "if (true) { /* some comment */ console.log(z); }" - } -]|}] + run_all_matches (module Omega.C) source template; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":48,"line":1,"column":49}},"environment":[{"variable":"1","value":"true","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":8,"line":1,"column":9}}},{"variable":"2","value":"console.log(z);","range":{"start":{"offset":31,"line":1,"column":32},"end":{"offset":46,"line":1,"column":47}}}],"matched":"if (true) { /* some comment */ console.log(z); }"}]} +|}] let%expect_test "single_quote_in_comment" = @@ -143,13 +81,13 @@ let%expect_test "single_quote_in_comment" = |} in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact {| {test} |}]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact {| {test} @@ -178,7 +116,7 @@ let%expect_test "single_quote_in_comment" = |} in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact {| { @@ -189,7 +127,7 @@ let%expect_test "single_quote_in_comment" = } |}]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact {| { @@ -222,7 +160,7 @@ let%expect_test "single_quote_in_comment" = |} in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact {| { @@ -232,7 +170,7 @@ let%expect_test "single_quote_in_comment" = } |}]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact {| { @@ -262,7 +200,7 @@ let%expect_test "give_back_the_comment_characters_for_newline_comments_too" = |} in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact {| { @@ -270,7 +208,7 @@ let%expect_test "give_back_the_comment_characters_for_newline_comments_too" = } |}]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact {| { @@ -300,9 +238,9 @@ a {|erased|} in - all (module Alpha) template source rewrite_template; + run (module Alpha.C) source template rewrite_template; [%expect_exact {|erased|}]; - all (module Omega) template source rewrite_template; + run (module Omega.C) source template rewrite_template; [%expect_exact {|erased|}] diff --git a/test/common/test_cli.ml b/test/common/test_cli.ml index d905710..4bb4253 100644 --- a/test/common/test_cli.ml +++ b/test/common/test_cli.ml @@ -231,7 +231,7 @@ let%expect_test "with_rewrite_rule_stdin_default_no_extension" = @|-1,1 +1,1 ============================================================ !|hello world -WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. +WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] let%expect_test "generic_matcher" = @@ -250,7 +250,7 @@ let%expect_test "generic_matcher" = -|\footnote{\small \url{https://github.com}} +|\footnote{\scriptsize \url{https://github.com}} -WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. +WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] @@ -473,7 +473,7 @@ let%expect_test "exclude_dir_option" = -int main() {} +int pain() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}]; + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}]; let src_dir = "example" ^/ "src" in let command_args = Format.sprintf "'main' 'pain' -sequential -d %s -exclude-dir 'nonexist' -diff" src_dir in @@ -522,7 +522,7 @@ let%expect_test "exclude_dir_option" = -int main() {} +int pain() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}] + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] let%expect_test "exclude_file_option" = let source = "hello world" in @@ -539,7 +539,7 @@ let%expect_test "exclude_file_option" = -main +pain - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}] + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] let%expect_test "dir_depth_option" = let source = "hello world" in @@ -563,7 +563,7 @@ let%expect_test "dir_depth_option" = -int depth_0() {} +int correct_depth_0() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}]; + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}]; let source = "hello world" in let src_dir = "example" ^/ "src" in @@ -583,7 +583,7 @@ let%expect_test "dir_depth_option" = -int depth_1() {} +int correct_depth_1() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}]; + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}]; let source = "hello world" in let src_dir = "example" ^/ "src" in @@ -608,7 +608,7 @@ let%expect_test "dir_depth_option" = -int depth_2() {} +int correct_depth_2() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}]; + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}]; let source = "hello world" in let src_dir = "example" ^/ "src" in @@ -633,7 +633,7 @@ let%expect_test "dir_depth_option" = -int depth_2() {} +int correct_depth_2() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}] + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] let%expect_test "matcher_override" = let source = "hello world" in @@ -699,7 +699,7 @@ let%expect_test "infer_and_honor_extensions" = +// bar() } - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}] + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] let%expect_test "diff_only" = let source = "hello world" in @@ -710,7 +710,7 @@ let%expect_test "diff_only" = [%expect{| {"uri":null,"diff":"--- /dev/null\n+++ /dev/null\n@@ -1,1 +1,1 @@\n-hello world\n\\ No newline at end of file\n+world world\n\\ No newline at end of file"} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}]; + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}]; let source = "hello world" in let command_args = Format.sprintf "'hello' 'world' -stdin -sequential -json-only-diff" in @@ -752,7 +752,7 @@ let%expect_test "zip_exclude_dir_no_extension" = -func main() {} +func pain() {} - WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning. |}] + WARNING: the GENERIC matcher was used, because a language could not be inferred from the file extension(s). The GENERIC matcher may miss matches. See '-list' to set a matcher for a specific language and to remove this warning, or add -matcher .generic to suppress this warning. |}] let%expect_test "zip_exclude_file" = let source = "doesn't matter" in @@ -1213,30 +1213,32 @@ let%expect_test "test_custom_metasyntax_replace" = let source = "a(b)" in let metasyntax_path = "example" ^/ "metasyntax" ^/ "dolla.json" in let command_args = - Format.sprintf "'$A($B~\\w+$)' '$A $B' -stdin -sequential -custom-metasyntax %s -stdout" metasyntax_path + Format.sprintf "'$A($B~\\w+$)' '$A $B' -stdin -sequential -custom-metasyntax %s -stdout -matcher .generic" metasyntax_path in let command = Format.sprintf "%s %s" binary_path command_args in let result = read_expect_stdin_and_stdout command source in print_string result; - [%expect "a b"] + [%expect " + a b"] let%expect_test "test_custom_metasyntax_replace" = let source = "a(b)" in let metasyntax_path = "example" ^/ "metasyntax" ^/ "dolla.json" in let command_args = - Format.sprintf "'$A($B~\\w+$)' '$A~x$ $B~\\w+$' -stdin -sequential -custom-metasyntax %s -stdout" metasyntax_path + Format.sprintf "'$A($B~\\w+$)' '$A~x$ $B~\\w+$' -stdin -sequential -custom-metasyntax %s -stdout -matcher .generic" metasyntax_path in let command = Format.sprintf "%s %s" binary_path command_args in let result = read_expect_stdin_and_stdout command source in print_string result; - [%expect "a b"] + [%expect " + a b"] let%expect_test "test_custom_metasyntax_substitute" = let source = "IGNORED" in let metasyntax_path = "example" ^/ "metasyntax" ^/ "dolla.json" in let env = {|[{"variable":"B", "value":"hello" }]|} in let command_args = - Format.sprintf "'IGNORED' '$A $B~\\w+$' -stdin -sequential -custom-metasyntax %s -substitute-only '%s'" metasyntax_path env + Format.sprintf "'IGNORED' '$A $B~\\w+$' -stdin -sequential -custom-metasyntax %s -substitute-only '%s' -matcher .generic" metasyntax_path env in let command = Format.sprintf "%s %s" binary_path command_args in let result = read_expect_stdin_and_stdout command source in @@ -1247,20 +1249,22 @@ let%expect_test "test_custom_metasyntax_partial_rule_support" = let source = "a(b)" in let metasyntax_path = "example" ^/ "metasyntax" ^/ "dolla.json" in let command_args = - Format.sprintf {|'$A($B)' '$A $B' -rule 'where rewrite :[A] { "$C~a$" -> "$C" }' -stdin -custom-metasyntax %s -stdout|} metasyntax_path + Format.sprintf {|'$A($B)' '$A $B' -rule 'where rewrite :[A] { "$C~a$" -> "$C" }' -stdin -custom-metasyntax %s -stdout -matcher .generic|} metasyntax_path in let command = Format.sprintf "%s %s" binary_path command_args in let result = read_expect_stdin_and_stdout command source in print_string result; - [%expect "$C b"] + [%expect " + $C b"] let%expect_test "test_custom_metasyntax_reserved_identifiers" = let source = "fun f -> (fun x -> f (x x)) (fun x -> f (x x))" in let metasyntax_path = "example" ^/ "metasyntax" ^/ "default.json" in let command_args = - Format.sprintf {|'λ f -> α α' 'α' -stdin -custom-metasyntax %s -stdout|} metasyntax_path + Format.sprintf {|'λ f -> α α' 'α' -stdin -custom-metasyntax %s -stdout -matcher .generic|} metasyntax_path in let command = Format.sprintf "%s %s" binary_path command_args in let result = read_expect_stdin_and_stdout command source in print_string result; - [%expect "(fun x -> f (x x))"] + [%expect " + (fun x -> f (x x))"] diff --git a/test/common/test_custom_metasyntax.ml b/test/common/test_custom_metasyntax.ml new file mode 100644 index 0000000..25668e6 --- /dev/null +++ b/test/common/test_custom_metasyntax.ml @@ -0,0 +1,276 @@ +open Core + +open Comby_kernel + +let configuration = Matchers.Configuration.create ~match_kind:Fuzzy () + +let create (module E : Matchers.Engine.S) syntax = + let metasyntax = Matchers.Metasyntax.{ syntax; identifier = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" } in + Option.value_exn (E.select_with_extension ~metasyntax ".go") + +let run (module M : Matchers.Matcher.S) source match_template _rewrite_template = + M.all ~configuration ~template:match_template ~source () + |> function + | [] -> print_string "No matches." + | results -> print_string (Format.asprintf "%a" Match.pp_json_lines (None, results)) + +let%expect_test "custom_metasyntax_everything" = + let matcher = + [ Matchers.Metasyntax.Hole (Everything, Delimited (Some "$", None)) + ] + in + + let source = "simple(test)" in + run (create (module Matchers.Alpha) matcher) source "simple($A)" ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":12,"line":1,"column":13}},"environment":[{"variable":"A","value":"test","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}}}],"matched":"simple(test)"}]} +|}]; + run (create (module Matchers.Omega) matcher) source "simple($A)" ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":12,"line":1,"column":13}},"environment":[{"variable":"A","value":"test","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}}}],"matched":"simple(test)"}]} +|}]; + + + let source = "(nested(test))" in + run (create (module Matchers.Alpha) matcher) source "($A)" ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":14,"line":1,"column":15}},"environment":[{"variable":"A","value":"nested(test)","range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":13,"line":1,"column":14}}}],"matched":"(nested(test))"}]} +|}]; + run (create (module Matchers.Omega) matcher) source "($A)" ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":14,"line":1,"column":15}},"environment":[{"variable":"A","value":"nested(test)","range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":13,"line":1,"column":14}}}],"matched":"(nested(test))"}]} +|}]; + + + let source = "flat stuff yeah" in + run (create (module Matchers.Alpha) matcher) source "flat $A yeah" ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":15,"line":1,"column":16}},"environment":[{"variable":"A","value":"stuff","range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":10,"line":1,"column":11}}}],"matched":"flat stuff yeah"}]} +|}]; + run (create (module Matchers.Omega) matcher) source "flat $A yeah" ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":15,"line":1,"column":16}},"environment":[{"variable":"A","value":"stuff","range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":10,"line":1,"column":11}}}],"matched":"flat stuff yeah"}]} +|}] + + +let%expect_test "custom_metasyntax_regex" = + let matcher = + Matchers.Metasyntax. + [ Regex ("$", ':', " ") + ] + in + + let source = "simple(test)" in + run (create (module Matchers.Alpha) matcher) source {|$A:\w+ |} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple"},{"range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"test","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}}}],"matched":"test"}]} +|}]; + run (create (module Matchers.Omega) matcher) source {|$A:\w+ |} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple"},{"range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"test","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":11,"line":1,"column":12}}}],"matched":"test"}]} +|}] + +let%expect_test "custom_metasyntax_multiple_holes" = + let matcher = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "?", None)) + ] + in + + run (create (module Matchers.Alpha) matcher) "simple(bar)" {|$FOO(?BAR)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"BAR","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}},{"variable":"FOO","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple(bar)"}]} +|}]; + run (create (module Matchers.Omega) matcher) "simple(bar)" {|$FOO(?BAR)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"BAR","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}},{"variable":"FOO","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple(bar)"}]} +|}]; + + run (create (module Matchers.Alpha) matcher) "foo(bar)" {|?FOO($BAR)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"BAR","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}},{"variable":"FOO","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo(bar)"}]} +|}]; + run (create (module Matchers.Omega) matcher) "foo(bar)" {|?FOO($BAR)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"BAR","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}},{"variable":"FOO","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo(bar)"}]} +|}]; + + let matcher = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "$$", None)) + ] + in + + run (create (module Matchers.Alpha) matcher) "foo(bar.baz)" {|$$A|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}},"environment":[{"variable":"A","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo"},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}},"environment":[{"variable":"A","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"bar"},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"baz","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}}}],"matched":"baz"}]} +|}]; + run (create (module Matchers.Omega) matcher) "foo(bar.baz)" {|$$A|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}},"environment":[{"variable":"A","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo"},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}},"environment":[{"variable":"A","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"bar"},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"baz","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}}}],"matched":"baz"}]} +|}]; + + let matcher = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "$$", None)) + ; Regex ("$", ':', " ") + ] + in + + run (create (module Matchers.Alpha) matcher) "foo(bar.baz)" {|$M:\w+ |} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}},"environment":[{"variable":"M","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo"},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}},"environment":[{"variable":"M","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"bar"},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"M","value":"baz","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}}}],"matched":"baz"}]} +|}]; + run (create (module Matchers.Omega) matcher) "foo(bar.baz)" {|$M:\w+ |} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}},"environment":[{"variable":"M","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo"},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}},"environment":[{"variable":"M","value":"bar","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"bar"},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"M","value":"baz","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":11,"line":1,"column":12}}}],"matched":"baz"}]} +|}]; + + (* Expect no matches: Everything parser takes precedence. Allow folding over list to define order. *) + let matcher = + Matchers.Metasyntax. + [ Regex ("$", ':', " ") + ; Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "$$", None)) + ] + in + + run (create (module Matchers.Alpha) matcher) "foo(bar.baz)" {|$M:\w+ |} ""; + [%expect_exact {|No matches.|}]; + run (create (module Matchers.Omega) matcher) "foo(bar.baz)" {|$M:\w+ |} ""; + [%expect_exact {|No matches.|}] + + +let%expect_test "custom_metasyntax_underscore" = + let matcher = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "?", None)) + ] + in + + run (create (module Matchers.Alpha) matcher) "simple(bar)" {|$_(?_)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"_","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}}],"matched":"simple(bar)"}]} +|}]; + (* different because we record _ the first time and don't subsequently for implicit_equals *) + run (create (module Matchers.Omega) matcher) "simple(bar)" {|$_(?_)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"_","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} +|}] + +let%expect_test "custom_metasyntax_equivalence" = + let matcher = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Regex ("$", '~', "$") + ] + in + + run (create (module Matchers.Alpha) matcher) "foo(foo)" {|$A($A~\w+$)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"A","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}},{"variable":"A_equal_!@#$000000000011","value":"foo","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}}],"matched":"foo(foo)"}]} +|}]; + run (create (module Matchers.Omega) matcher) "foo(foo)" {|$A($A~\w+$)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"A","value":"foo","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":7,"line":1,"column":8}}},{"variable":"A_equal_!@#$000000000012","value":"foo","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":3,"line":1,"column":4}}}],"matched":"foo(foo)"}]} +|}] + +let%expect_test "custom_metasyntax_definition_order" = + let matcher = + Matchers.Metasyntax. + [ Regex ("$", '~', "$") + ; Hole (Everything, Delimited (Some "$", None)) + ] + in + + run (create (module Matchers.Alpha) matcher) "simple(bar)baz" {|$A($B)$C~\w+$|} ""; + [%expect_exact {|No matches.|}]; + run (create (module Matchers.Omega) matcher) "simple(bar)baz" {|$A($B)$C~\w+$|} ""; + [%expect_exact {|No matches.|}]; + + let matcher = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Regex ("$", '~', "$") + ] + in + + run (create (module Matchers.Alpha) matcher) "simple(bar)baz" {|$A($B)$C~\w+$|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":14,"line":1,"column":15}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"B","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}},{"variable":"C","value":"baz","range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":14,"line":1,"column":15}}}],"matched":"simple(bar)baz"}]} +|}]; + run (create (module Matchers.Omega) matcher) "simple(bar)baz" {|$A($B)$C~\w+$|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":14,"line":1,"column":15}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"B","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}},{"variable":"C","value":"baz","range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":14,"line":1,"column":15}}}],"matched":"simple(bar)baz"}]} +|}] + +let%expect_test "custom_metasyntax_rewrite_alpha" = + let syntax = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "?", None)) + ] + in + let metasyntax = Matchers.Metasyntax.{ syntax; identifier = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" } in + let matcher = Option.value_exn (Matchers.Alpha.select_with_extension ~metasyntax ".go") in + + let specification = Matchers.Specification.create ~match_template:"$A(?B)" ~rewrite_template:"??B -> $A$A" () in + let result = Pipeline.execute matcher ~metasyntax (String "simple(bar)") specification in + let output = match result with + | Replacement (_, result, _) -> result + | Matches _ -> "matches" + | Nothing -> "nothing" + in + print_string output; + [%expect_exact {|?bar -> simplesimple|}]; + + let specification = Matchers.Specification.create ~match_template:"$A(?B)" ~rewrite_template:"$id() $id(a) $id(a)" () in + let result = Pipeline.execute matcher ~metasyntax (String "simple(bar)") specification in + let output = match result with + | Replacement (_, result, _) -> result + | Matches _ -> "matches" + | Nothing -> "nothing" + in + print_string output; + [%expect_exact {|1 2 2|}] + +let%expect_test "custom_metasyntax_rewrite_omega" = + let syntax = + Matchers.Metasyntax. + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "?", None)) + ] + in + let metasyntax = Matchers.Metasyntax.{ syntax; identifier = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_" } in + let matcher = Option.value_exn (Matchers.Omega.select_with_extension ~metasyntax ".go") in + + let specification = Matchers.Specification.create ~match_template:"$A(?B)" ~rewrite_template:"??B -> $A$A" () in + let result = Pipeline.execute matcher ~metasyntax (String "simple(bar)") specification in + let output = match result with + | Replacement (_, result, _) -> result + | Matches _ -> "matches" + | Nothing -> "nothing" + in + print_string output; + [%expect_exact {|?bar -> simplesimple|}]; + + let specification = Matchers.Specification.create ~match_template:"$A(?B)" ~rewrite_template:"$id() $id(a) $id(a)" () in + let result = Pipeline.execute matcher ~metasyntax (String "simple(bar)") specification in + let output = match result with + | Replacement (_, result, _) -> result + | Matches _ -> "matches" + | Nothing -> "nothing" + in + print_string output; + [%expect_exact {|3 4 4|}] + +let%expect_test "custom_metasyntax_greek_letters" = + let matcher = + Matchers.Metasyntax. + [ Hole (Alphanum, Reserved_identifiers ["α"; "β"]) + ] + in + + run (create (module Matchers.Alpha) matcher) "simple(bar)" {|α(β)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"α","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"β","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} +|}]; + run (create (module Matchers.Omega) matcher) "simple(bar)" {|α(β)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"α","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"β","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} +|}] + +let%expect_test "custom_metasyntax_alphanum_test" = + let matcher = + Matchers.Metasyntax. + [ Hole (Alphanum, Delimited (Some "[:", Some ":]")) + ; Hole (Alphanum, Reserved_identifiers ["α"; "β"]) + ] + in + + run (create (module Matchers.Alpha) matcher) "simple(bar)" {|[:A:](α)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"α","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} +|}]; + run (create (module Matchers.Omega) matcher) "simple(bar)" {|[:A:](α)|} ""; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"A","value":"simple","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":6,"line":1,"column":7}}},{"variable":"α","value":"bar","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":10,"line":1,"column":11}}}],"matched":"simple(bar)"}]} +|}] diff --git a/test/common/test_extract_regex.ml b/test/common/test_extract_regex.ml index d346633..a49de22 100644 --- a/test/common/test_extract_regex.ml +++ b/test/common/test_extract_regex.ml @@ -5,6 +5,13 @@ open Comby let%expect_test "basic" = let match_template = "for :[i], :[x] := range :[list] {:[body]}" in let spec = Matchers.Specification.create ~match_template () in - let result = Regex.to_regex spec in + let result = Matchers.Specification.to_regex spec in print_string result; - [%expect_exact {|(for\s+)(\n|.)*?(,\s+)(\n|.)*?(\s+:=\s+range\s+)(\n|.)*?(\s+\{)(\n|.)*?(\})|}]; + [%expect_exact {|(for\s+(\n|.)*?,\s+(\n|.)*?\s+:=\s+range\s+(\n|.)*?\s+\{(\n|.)*?\})|}] + +let%expect_test "different_holes" = + let match_template = "for :[[i]], :[ ] := range :[list.] {:[body]}" in + let spec = Matchers.Specification.create ~match_template () in + let result = Matchers.Specification.to_regex spec in + print_string result; + [%expect_exact {|(for\s+(\w+),\s+(\ |\t|\s|\r|\n)+\s+:=\s+range\s+([^ \t\s\r\n])+\s+\{(\n|.)*?\})|}] diff --git a/test/common/test_go.ml b/test/common/test_go.ml index f64a1a0..b98a896 100644 --- a/test/common/test_go.ml +++ b/test/common/test_go.ml @@ -4,21 +4,6 @@ open Test_helpers open Comby_kernel open Matchers -let run ?(rule = "where true") (module E : Engine.S) source match_template rewrite_template = - let rule = Rule.create rule |> Or_error.ok_exn in - E.Go.first ~configuration match_template source - |> function - | Ok ({environment; _ } as result) -> - if Rule.(sat @@ apply ~match_all:(E.Go.all ~rule:[Ast.True] ~nested:false) rule environment) then - Rewrite.all ~source ~rewrite_template [result] - |> (fun x -> Option.value_exn x) - |> (fun { rewritten_source; _ } -> rewritten_source) - |> print_string - else - assert false - | Error _ -> - print_string rewrite_template - let%expect_test "gosimple_s1000" = let source = {| @@ -45,13 +30,13 @@ let%expect_test "gosimple_s1000" = |} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.Go) source match_template rewrite_template; [%expect_exact {| x := <-ch fmt.Println(x) |}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.Go) source match_template rewrite_template; [%expect_exact {| x := <-ch fmt.Println(x) @@ -83,12 +68,12 @@ let%expect_test "gosimple_s1001" = let rule = {|where :[index_define] == :[index_use], :[src_element_define] == :[src_element_use]|} in - run (module Alpha) ~rule source match_template rewrite_template; + run (module Alpha.Go) ~rule source match_template rewrite_template; [%expect_exact {| copy(dst, src) |}]; - run (module Omega) ~rule source match_template rewrite_template; + run (module Omega.Go) ~rule source match_template rewrite_template; [%expect_exact {| copy(dst, src) |}] @@ -108,7 +93,7 @@ let%expect_test "gosimple_s1003" = let rewrite_template = {|:[1]|} in - run (module Alpha) source match_template rewrite_template; + run (module Alpha.Go) source match_template rewrite_template; [%expect_exact {|Index|}]; - run (module Omega) source match_template rewrite_template; + run (module Omega.Go) source match_template rewrite_template; [%expect_exact {|Index|}] diff --git a/test/common/test_helpers.ml b/test/common/test_helpers.ml index 8603fb0..d55c68a 100644 --- a/test/common/test_helpers.ml +++ b/test/common/test_helpers.ml @@ -41,30 +41,62 @@ let run ?(configuration = configuration) (module M : Matchers.Matcher.S) source |> (fun { rewritten_source; _ } -> rewritten_source) |> print_string -let run_nested - (module M : Matchers.Matcher.S) - ?(configuration = configuration) - ?rule - source - match_template - () = - let nested = - match rule with - | None -> true - | Some rule -> - let options = Rule.create rule |> Or_error.ok_exn |> Rule.options in - options.nested - in - M.all ~configuration ~nested ~template:match_template ~source () - |> function - | [] -> print_string "No matches." - | matches -> - let matches = List.map matches ~f:(Match.convert_offset ~fast:true ~source) in - Format.asprintf "%a" Match.pp (None, matches) - |> print_string - -(** Rule tests *) let make_env bindings = List.fold bindings ~init:(Match.Environment.create ()) ~f:(fun env (var, value) -> Match.Environment.add env var value) + +let parse_template metasyntax template = + let (module M) = Matchers.Metasyntax.create metasyntax in + let module Template_parser = Template.Make(M) in + let tree = Template_parser.parse template in + Sexp.to_string_hum (Template.sexp_of_t tree) + +let run_match (module M : Matchers.Matcher.S) source ?rule match_template = + let rule = + match rule with + | Some rule -> Matchers.Rule.create rule |> Or_error.ok_exn + | None -> Rule.create "where true" |> Or_error.ok_exn + in + M.all ~configuration ~rule ~template:match_template ~source () + |> function + | [] -> print_string "No matches." + | hd :: _ -> + print_string (Yojson.Safe.to_string (Match.to_yojson hd)) + +let run_all_matches (module M : Matchers.Matcher.S) ?(format = `Json) source ?rule match_template = + let rule = + match rule with + | Some rule -> Matchers.Rule.create rule |> Or_error.ok_exn + | None -> Rule.create "where true" |> Or_error.ok_exn + in + M.all ~configuration ~rule ~template:match_template ~source () + |> function + | [] -> print_string "No matches." + | l -> + match format with + | `Json -> + Format.printf "%a" Match.pp_json_lines (None, l) + | `Lines -> + let matches = List.map l ~f:(Match.convert_offset ~fast:true ~source) in + Format.asprintf "%a" Match.pp (None, matches) + |> print_string + +let run_rule (module E : Engine.S) source match_template rewrite_template rule = + let (module M : Matcher.S) = (module E.Generic) in + M.first ~configuration match_template source + |> function + | Error _ -> print_string "bad" + | Ok result -> + match result with + | ({ environment; _ } as m) -> + let e = Rule.(result_env @@ apply ~match_all:(M.all ~rule:[Ast.True]) rule environment) in + match e with + | None -> print_string "bad bad" + | Some e -> + { m with environment = e } + |> List.return + |> Rewrite.all ~source ~rewrite_template + |> (fun x -> Option.value_exn x) + |> (fun { rewritten_source; _ } -> rewritten_source) + |> print_string diff --git a/test/common/test_hole_extensions.ml b/test/common/test_hole_extensions.ml index c12acff..2414d1d 100644 --- a/test/common/test_hole_extensions.ml +++ b/test/common/test_hole_extensions.ml @@ -214,7 +214,7 @@ let%expect_test "implicit_equals_does_not_apply_to_underscore" = [%expect_exact {|a|}] -let%expect_test "expression_hole" = +let%expect_test "expression_hole_basic" = let source = {|(b, c, d) [ ] { { } } { } ()()|} in let match_template = {|:[x:e]|} in let rewrite_template = {|>:[x]<|} in @@ -224,8 +224,7 @@ let%expect_test "expression_hole" = run_all (module Omega) source match_template rewrite_template; [%expect_exact {|>(b, c, d)< >[ ]< >{ { } }< >{ }< >()()<|}] - -let%expect_test "expression_hole" = +let%expect_test "expression_hole_basic_2" = let source = {|a(b, c, d)e [][] { { } }|} in let match_template = {|:[x:e]|} in let rewrite_template = {|>:[x]<|} in @@ -233,4 +232,14 @@ let%expect_test "expression_hole" = run_all (module Alpha) source match_template rewrite_template; [%expect_exact {|>a(b, c, d)e< >[][]< >{ { } }<|}]; run_all (module Omega) source match_template rewrite_template; + [%expect_exact {|>a(b, c, d)e< >[][]< >{ { } }<|}] + +let%expect_test "expression_hole_multiple" = + let source = {|a(b, c, d)e [][] { { } }|} in + let match_template = {|:[x:e] :[y:e] :[z:e]|} in + let rewrite_template = {|>:[x]< >:[y]< >:[z]<|} in + + run_all (module Alpha) source match_template rewrite_template; [%expect_exact {|>a(b, c, d)e< >[][]< >{ { } }<|}]; + run_all (module Omega) source match_template rewrite_template; + [%expect_exact {|>a(b, c, d)e< >[][]< >{ { } }<|}] diff --git a/test/common/test_match_offsets.ml b/test/common/test_match_offsets.ml new file mode 100644 index 0000000..e3e30aa --- /dev/null +++ b/test/common/test_match_offsets.ml @@ -0,0 +1,17 @@ +open Comby_kernel + +open Test_helpers + +open Matchers + + +let%expect_test "correct_match_offsets_regex" = + let source = "foo bar foobar" in + let match_template = {|:[x~\s*]|} in + (*let rewrite_template = "(:[x])" in*) + run_all_matches (module Alpha.Generic) source match_template; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":0,"line":1,"column":1}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":0,"line":1,"column":1}}}],"matched":""},{"range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":1,"line":1,"column":2}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":1,"line":1,"column":2}}}],"matched":""},{"range":{"start":{"offset":2,"line":1,"column":3},"end":{"offset":2,"line":1,"column":3}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":2,"line":1,"column":3},"end":{"offset":2,"line":1,"column":3}}}],"matched":""},{"range":{"start":{"offset":3,"line":1,"column":4},"end":{"offset":4,"line":1,"column":5}},"environment":[{"variable":"x","value":" ","range":{"start":{"offset":3,"line":1,"column":4},"end":{"offset":4,"line":1,"column":5}}}],"matched":" "},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":4,"line":1,"column":5}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":4,"line":1,"column":5}}}],"matched":""},{"range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":5,"line":1,"column":6}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":5,"line":1,"column":6}}}],"matched":""},{"range":{"start":{"offset":6,"line":1,"column":7},"end":{"offset":6,"line":1,"column":7}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":6,"line":1,"column":7},"end":{"offset":6,"line":1,"column":7}}}],"matched":""},{"range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"x","value":" ","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":8,"line":1,"column":9}}}],"matched":" "},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":8,"line":1,"column":9}}}],"matched":""},{"range":{"start":{"offset":9,"line":1,"column":10},"end":{"offset":9,"line":1,"column":10}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":9,"line":1,"column":10},"end":{"offset":9,"line":1,"column":10}}}],"matched":""},{"range":{"start":{"offset":10,"line":1,"column":11},"end":{"offset":10,"line":1,"column":11}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":10,"line":1,"column":11},"end":{"offset":10,"line":1,"column":11}}}],"matched":""},{"range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":11,"line":1,"column":12}}}],"matched":""},{"range":{"start":{"offset":12,"line":1,"column":13},"end":{"offset":12,"line":1,"column":13}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":12,"line":1,"column":13},"end":{"offset":12,"line":1,"column":13}}}],"matched":""},{"range":{"start":{"offset":13,"line":1,"column":14},"end":{"offset":13,"line":1,"column":14}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":13,"line":1,"column":14},"end":{"offset":13,"line":1,"column":14}}}],"matched":""}]} +|}]; + run_all_matches (module Omega.Generic) source match_template; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":0,"line":1,"column":1}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":0,"line":1,"column":1}}}],"matched":""},{"range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":1,"line":1,"column":2}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":1,"line":1,"column":2},"end":{"offset":1,"line":1,"column":2}}}],"matched":""},{"range":{"start":{"offset":2,"line":1,"column":3},"end":{"offset":2,"line":1,"column":3}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":2,"line":1,"column":3},"end":{"offset":2,"line":1,"column":3}}}],"matched":""},{"range":{"start":{"offset":3,"line":1,"column":4},"end":{"offset":4,"line":1,"column":5}},"environment":[{"variable":"x","value":" ","range":{"start":{"offset":3,"line":1,"column":4},"end":{"offset":4,"line":1,"column":5}}}],"matched":" "},{"range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":4,"line":1,"column":5}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":4,"line":1,"column":5},"end":{"offset":4,"line":1,"column":5}}}],"matched":""},{"range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":5,"line":1,"column":6}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":5,"line":1,"column":6},"end":{"offset":5,"line":1,"column":6}}}],"matched":""},{"range":{"start":{"offset":6,"line":1,"column":7},"end":{"offset":6,"line":1,"column":7}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":6,"line":1,"column":7},"end":{"offset":6,"line":1,"column":7}}}],"matched":""},{"range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"x","value":" ","range":{"start":{"offset":7,"line":1,"column":8},"end":{"offset":8,"line":1,"column":9}}}],"matched":" "},{"range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":8,"line":1,"column":9}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":8,"line":1,"column":9},"end":{"offset":8,"line":1,"column":9}}}],"matched":""},{"range":{"start":{"offset":9,"line":1,"column":10},"end":{"offset":9,"line":1,"column":10}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":9,"line":1,"column":10},"end":{"offset":9,"line":1,"column":10}}}],"matched":""},{"range":{"start":{"offset":10,"line":1,"column":11},"end":{"offset":10,"line":1,"column":11}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":10,"line":1,"column":11},"end":{"offset":10,"line":1,"column":11}}}],"matched":""},{"range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":11,"line":1,"column":12}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":11,"line":1,"column":12},"end":{"offset":11,"line":1,"column":12}}}],"matched":""},{"range":{"start":{"offset":12,"line":1,"column":13},"end":{"offset":12,"line":1,"column":13}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":12,"line":1,"column":13},"end":{"offset":12,"line":1,"column":13}}}],"matched":""},{"range":{"start":{"offset":13,"line":1,"column":14},"end":{"offset":13,"line":1,"column":14}},"environment":[{"variable":"x","value":"","range":{"start":{"offset":13,"line":1,"column":14},"end":{"offset":13,"line":1,"column":14}}}],"matched":""}]} +|}] diff --git a/test/common/test_match_rule.ml b/test/common/test_match_rule.ml index f55a36e..65550ff 100644 --- a/test/common/test_match_rule.ml +++ b/test/common/test_match_rule.ml @@ -8,12 +8,12 @@ open Test_helpers let sat ?(env = Match.Environment.create ()) (module E : Engine.S) rule = let rule = Rule.create rule |> Or_error.ok_exn in - Format.sprintf "%b" Rule.(sat @@ apply ~match_all:(E.Generic.all ~rule:[Ast.True] ~nested:false) rule env) + Format.sprintf "%b" Rule.(sat @@ apply ~match_all:(E.Generic.all ~rule:[Ast.True]) rule env) let run (module E : Engine.S) template source rule = let (module M : Matcher.S) = (module E.Generic) in M.all ~configuration ~template ~source () - |> List.filter ~f:(fun { environment; _ } -> Rule.(sat @@ apply ~match_all:(M.all ~rule:[Ast.True] ~nested:false) rule environment)) + |> List.filter ~f:(fun { environment; _ } -> Rule.(sat @@ apply ~match_all:(M.all ~rule:[Ast.True]) rule environment)) let%expect_test "rule_sat" = let rule = {| where "x" != "y" |} in @@ -48,9 +48,9 @@ let%expect_test "rule_sat" = let rule = {| where :[x] == :[x] |} in sat (module Alpha) rule |> print_string; - [%expect_exact {|false|}]; + [%expect_exact {|true|}]; sat (module Omega) rule |> print_string; - [%expect_exact {|false|}] + [%expect_exact {|true|}] let%expect_test "rule_sat_with_env" = let env = make_env ["1", "x"; "2", "y"; "3", "x"] in diff --git a/test/common/test_nested_matches.ml b/test/common/test_nested_matches.ml index 4b2d392..49d66e1 100644 --- a/test/common/test_nested_matches.ml +++ b/test/common/test_nested_matches.ml @@ -5,9 +5,10 @@ open Comby_kernel open Matchers -let run (module E : Engine.S) = run_nested (module E.Generic) - let%expect_test "nested_matches" = + let rule = {|where nested|} in + let format = `Lines in + let source = {| a{ b{ @@ -25,7 +26,7 @@ q{ |} in let match_template = {|:[[f]]{:[x]}|} in - run (module Alpha) source match_template (); + run_all_matches (module Alpha.Generic) ~format source ~rule match_template; [%expect_exact {|2:a{\n b{\n c{\n d{e}\n }\n }\n } 3:b{\n c{\n d{e}\n }\n } 4:c{\n d{e}\n } @@ -35,7 +36,7 @@ q{ 12:f{} |}]; - run (module Omega) source match_template (); + run_all_matches (module Omega.Generic) ~format source ~rule match_template; [%expect_exact {|2:a{\n b{\n c{\n d{e}\n }\n }\n } 3:b{\n c{\n d{e}\n }\n } 4:c{\n d{e}\n } @@ -43,41 +44,20 @@ q{ 10:q{\n b{\n f{}\n }\n} 11:b{\n f{}\n } 12:f{} -|}] +|}]; -let%expect_test "nested_matches" = let source = {|a(b(c(d(e))))|} in - let match_template = {|:[[f]](:[x])|} in - run (module Alpha) source match_template (); + let match_template = {|:[[f]](:[x])|} in + run_all_matches (module Alpha.Generic) ~format source ~rule match_template; [%expect_exact {|1:a(b(c(d(e)))) 1:b(c(d(e))) 1:c(d(e)) 1:d(e) |}]; - run (module Omega) source match_template (); - [%expect_exact {|1:a(b(c(d(e)))) -1:b(c(d(e))) -1:c(d(e)) -1:d(e) -|}] - - -let%expect_test "nested_matches_from_rule" = - let source = {|a(b(c(d(e))))|} in - let match_template = {|:[[f]](:[x])|} in - let rule = {|where nested|} in - - run (module Alpha) ~rule source match_template (); - [%expect_exact {|1:a(b(c(d(e)))) -1:b(c(d(e))) -1:c(d(e)) -1:d(e) -|}]; - - run (module Omega) ~rule source match_template (); + run_all_matches (module Omega.Generic) ~format source ~rule match_template; [%expect_exact {|1:a(b(c(d(e)))) 1:b(c(d(e))) 1:c(d(e)) diff --git a/test/common/test_parse_rewrite_template.ml b/test/common/test_parse_rewrite_template.ml deleted file mode 100644 index e0e5dc9..0000000 --- a/test/common/test_parse_rewrite_template.ml +++ /dev/null @@ -1,69 +0,0 @@ -open Core - -open Matchers -open Rewriter -open Rewrite_template - -let parse metasyntax template = - let (module M) = Matchers.Metasyntax.create metasyntax in - let module Template_parser = Make(M) in - let tree = Template_parser.parse template in - match tree with - | Some tree -> Sexp.to_string_hum (sexp_of_list sexp_of_extracted tree) - | None -> "ERROR: NO PARSE" - -let%expect_test "interpret_incomplete_hole_as_constant" = - let template = ":[B :[A]" in - parse Matchers.Metasyntax.default_metasyntax template |> print_string; - [%expect_exact {|((Constant :) (Constant "[B ") (Hole ((variable A) (pattern :[A]))))|}]; - - let template = ":[B :[A~x]" in - parse Matchers.Metasyntax.default_metasyntax template |> print_string; - [%expect_exact {|((Constant :) (Constant "[B ") (Hole ((variable A) (pattern :[A~x]))))|}] - -let%expect_test "interpret_incomplete_hole_as_constant_metasyntax" = - let template = "$:x $B:x $A" in - let metasyntax = - Matchers.Metasyntax.{ - syntax = - [ Hole (Everything, Delimited (Some "$", None)) - ; Hole (Alphanum, Delimited (Some "$$", None)) - ; Regex ("$", ':', " ") - ] - ; identifier = "AB" - } - in - parse metasyntax template |> print_string; - [%expect_exact {|((Constant $) (Constant ":x ") (Hole ((variable B) (pattern "$B:x "))) - (Constant " ") (Hole ((variable A) (pattern $A))))|}] - -let%expect_test "interpret_incomplete_hole_as_constant_metasyntax" = - let template = "( , , )" in - let metasyntax = - Matchers.Metasyntax.{ - syntax = - [ Hole (Everything, Delimited (Some "NOTHING", None)) - ; Hole (Everything, Reserved_identifiers [" "; " "]) - ] - ; identifier = "AB" - } - in - parse metasyntax template |> print_string; - [%expect_exact {|((Constant "(") (Hole ((variable " ") (pattern " "))) (Constant ,) - (Hole ((variable " ") (pattern " "))) (Constant ,) - (Hole ((variable " ") (pattern " "))) (Constant ")"))|}] - -let%expect_test "interpret_incomplete_hole_as_constant_metasyntax" = - let template = "(..,.)" in - let metasyntax = - Matchers.Metasyntax.{ - syntax = - [ Hole (Everything, Delimited (Some "NOTHING", None)) - ; Hole (Everything, Reserved_identifiers [".."; "."]) - ] - ; identifier = "AB" - } - in - parse metasyntax template |> print_string; - [%expect_exact {|((Constant "(") (Hole ((variable ..) (pattern ..))) (Constant ,) - (Hole ((variable .) (pattern .))) (Constant ")"))|}] diff --git a/test/common/test_parse_rule.ml b/test/common/test_parse_rule.ml index 210c957..ed3a991 100644 --- a/test/common/test_parse_rule.ml +++ b/test/common/test_parse_rule.ml @@ -2,7 +2,6 @@ open Core open Comby_kernel open Matchers -open Rule let rule_parses rule = match Rule.create rule with @@ -106,7 +105,13 @@ let%expect_test "parse_freeform_antecedent_pattern" = [%expect_exact "(rule ((Match (String match_me) (((String \"case one\") (True)) ((String \"case two\") (False)) - ((String \":[template] :[example]\") (False)))))) + ((Template + ((Hole + ((variable template) (pattern :[template]) (offset 0) (kind Value))) + (Constant \" \") + (Hole + ((variable example) (pattern :[example]) (offset 12) (kind Value))))) + (False)))))) "] let%expect_test "optional_first_pipe_one_case" = @@ -161,7 +166,12 @@ let%expect_test "parse_freeform_antecedent_pattern_map_regex" = |> fun rule -> print_s [%message (rule : Ast.expression list)]; [%expect_exact "(rule ((Match (String match_me) - (((Variable :[~match_me]) (True)) ((Variable :[_]) (False)))))) + (((Template + ((Hole ((variable \"\") (pattern :[~match_me]) (offset 0) (kind Value))))) + (True)) + ((Template + ((Hole ((variable _) (pattern :[_]) (offset 0) (kind Value))))) + (False)))))) "] let%expect_test "parse_regex_hole" = @@ -175,8 +185,14 @@ let%expect_test "parse_regex_hole" = |> Or_error.ok_exn |> fun rule -> print_s [%message (rule : Ast.expression list)]; [%expect_exact "(rule - ((Match (Variable 1) - (((String \":[~^\\\\d+$]\") (False)) ((String :[_]) (True)))))) + ((Match + (Template ((Hole ((variable 1) (pattern :[1]) (offset 0) (kind Value))))) + (((Template + ((Hole ((variable \"\") (pattern \":[~^\\\\d+$]\") (offset 0) (kind Value))))) + (False)) + ((Template + ((Hole ((variable _) (pattern :[_]) (offset 0) (kind Value))))) + (True)))))) "] let%expect_test "parse_interpreting_escapes" = @@ -196,3 +212,62 @@ b` -> false, \\n\\\\\") (True)) ((String \"a\\\\n\\\\heh\\ \\nb\") (False)))))) "] + +let%expect_test "parse_freeform_antecedent_in_rewrite_rule" = + Rule.create + {| + where rewrite :[contents] { concat [:[x]] -> "nice" } + |} + |> Or_error.ok_exn + |> fun rule -> print_s [%message (rule : Ast.expression list)]; + [%expect_exact "(rule + ((Rewrite + (Template + ((Hole + ((variable contents) (pattern :[contents]) (offset 0) (kind Value))))) + ((Template + ((Constant \"concat [\") + (Hole ((variable x) (pattern :[x]) (offset 8) (kind Value))) + (Constant ]))) + (String nice))))) +"] + +let%expect_test "parse_freeform_consequent_in_rewrite_rule" = + Rule.create + {| where + rewrite :[0] { :[1] :[2] -> :[1] a } + |} + |> Or_error.ok_exn + |> fun rule -> print_s [%message (rule : Ast.expression list)]; + [%expect_exact "(rule + ((Rewrite + (Template ((Hole ((variable 0) (pattern :[0]) (offset 0) (kind Value))))) + ((Template + ((Hole ((variable 1) (pattern :[1]) (offset 0) (kind Value))) + (Constant \" \") + (Hole ((variable 2) (pattern :[2]) (offset 5) (kind Value))))) + (Template + ((Hole ((variable 1) (pattern :[1]) (offset 0) (kind Value))) + (Constant \" a\"))))))) +"] + +let%expect_test "this_damn_rule" = + Rule.create + {| + where match :[1] { + | ":[~^\\d+$]" -> false + | ":[_]" -> true + } +|} + |> Or_error.ok_exn + |> fun rule -> print_s [%message (rule : Ast.expression list)]; + [%expect_exact "(rule + ((Match + (Template ((Hole ((variable 1) (pattern :[1]) (offset 0) (kind Value))))) + (((Template + ((Hole ((variable \"\") (pattern \":[~^\\\\d+$]\") (offset 0) (kind Value))))) + (False)) + ((Template + ((Hole ((variable _) (pattern :[_]) (offset 0) (kind Value))))) + (True)))))) +" ] diff --git a/test/common/test_parse_template.ml b/test/common/test_parse_template.ml new file mode 100644 index 0000000..9afe437 --- /dev/null +++ b/test/common/test_parse_template.ml @@ -0,0 +1,114 @@ +open Core +open Comby_kernel + +open Test_helpers + + +let%expect_test "get_offsets_for_holes" = + let module Template_parser = Matchers.Template.Make(Matchers.Metasyntax.Default) in + let rewrite_template = {|1234:[1]1234:[2]|} in + let variables = Template_parser.variables rewrite_template in + print_s [%message (variables : Matchers.Template.syntax list)]; + [%expect {| + (variables + (((variable 1) (pattern :[1]) (offset 4) (kind Value)) + ((variable 2) (pattern :[2]) (offset 12) (kind Value)))) |}] + +let%expect_test "interpret_regex_shorthand" = + let module Template_parser = Matchers.Template.Make(Matchers.Metasyntax.Default) in + let rewrite_template = {|a:[~x]b|} in + let variables = Template_parser.variables rewrite_template in + print_s [%message (variables : Matchers.Template.syntax list)]; + [%expect {| + (variables (((variable "") (pattern :[~x]) (offset 1) (kind Value)))) |}] + +let%expect_test "interpret_incomplete_hole_as_constant" = + let template = ":[B :[A]" in + parse_template Matchers.Metasyntax.default_metasyntax template |> print_string; + [%expect_exact {|((Constant ":[B ") + (Hole ((variable A) (pattern :[A]) (offset 4) (kind Value))))|}]; + + let template = ":[B :[A~x]" in + parse_template Matchers.Metasyntax.default_metasyntax template |> print_string; + [%expect_exact {|((Constant ":[B ") + (Hole ((variable A) (pattern :[A~x]) (offset 4) (kind Value))))|}] + +let%expect_test "interpret_incomplete_hole_as_constant_metasyntax" = + let template = "$:x $B:x $A" in + let metasyntax = + Matchers.Metasyntax.{ + syntax = + [ Hole (Everything, Delimited (Some "$", None)) + ; Hole (Alphanum, Delimited (Some "$$", None)) + ; Regex ("$", ':', " ") + ] + ; identifier = "AB" + } + in + parse_template metasyntax template |> print_string; + [%expect_exact {|((Hole ((variable "") (pattern "$:x ") (offset 0) (kind Value))) + (Hole ((variable B) (pattern "$B:x ") (offset 4) (kind Value))) + (Constant " ") (Hole ((variable A) (pattern $A) (offset 10) (kind Value))))|}] + +let%expect_test "interpret_incomplete_hole_as_constant_metasyntax" = + let template = "( , , )" in + let metasyntax = + Matchers.Metasyntax.{ + syntax = + [ Hole (Everything, Delimited (Some "NOTHING", None)) + ; Hole (Everything, Reserved_identifiers [" "; " "]) + ] + ; identifier = "AB" + } + in + parse_template metasyntax template |> print_string; + [%expect_exact {|((Constant "(") + (Hole ((variable " ") (pattern " ") (offset 1) (kind Value))) (Constant ,) + (Hole ((variable " ") (pattern " ") (offset 4) (kind Value))) (Constant ,) + (Hole ((variable " ") (pattern " ") (offset 6) (kind Value))) + (Constant ")"))|}] + +let%expect_test "interpret_incomplete_hole_as_constant_metasyntax" = + let template = "(..,.)" in + let metasyntax = + Matchers.Metasyntax.{ + syntax = + [ Hole (Everything, Delimited (Some "NOTHING", None)) + ; Hole (Everything, Reserved_identifiers [".."; "."]) + ] + ; identifier = "AB" + } + in + parse_template metasyntax template |> print_string; + [%expect_exact {|((Constant "(") (Hole ((variable ..) (pattern ..) (offset 1) (kind Value))) + (Constant ,) (Hole ((variable .) (pattern .) (offset 4) (kind Value))) + (Constant ")"))|}] + +let%expect_test "parse_reserved_identifiers_as_holes" = + let template = "(α)" in + let metasyntax = + Matchers.Metasyntax.{ + syntax = + [ Hole (Expression, Reserved_identifiers ["α"]) + ] + ; identifier = "AB" + } + in + parse_template metasyntax template |> print_string; + [%expect_exact {|((Constant "(") + (Hole ((variable "\206\177") (pattern "\206\177") (offset 1) (kind Value))) + (Constant ")"))|}] + + +let%expect_test "get_offsets_for_holes" = + let module Template_parser = Matchers.Template.Make(Matchers.Metasyntax.Default) in + let template = ":[a].type :[b].length :[[c]].lengtha :[d.].length.ok (:[e].length)" in + let variables = Template_parser.variables template in + print_s [%message (variables : Matchers.Template.syntax list)]; + [%expect {| + (variables + (((variable a) (pattern :[a].type) (offset 0) (kind Type)) + ((variable b) (pattern :[b].length) (offset 10) (kind Length)) + ((variable c) (pattern :[[c]]) (offset 22) (kind Value)) + ((variable d) (pattern :[d.].length) (offset 37) (kind Length)) + ((variable e) (pattern :[e].length) (offset 54) (kind Length)))) |}] diff --git a/test/common/test_pipeline.ml b/test/common/test_pipeline.ml index f7f2f07..42113f6 100644 --- a/test/common/test_pipeline.ml +++ b/test/common/test_pipeline.ml @@ -12,7 +12,6 @@ let configuration (module E : Engine.S) = { verbose = false ; match_timeout = 3 ; dump_statistics = false - ; substitute_in_place = true ; disable_substring_matching = false ; fast_offset_conversion = false ; match_newline_toplevel = false @@ -21,7 +20,6 @@ let configuration (module E : Engine.S) = } ; output_printer = (fun _ -> ()) ; interactive_review = None - ; extension = None ; metasyntax = None } diff --git a/test/common/test_rewrite_parts.ml b/test/common/test_rewrite_parts.ml index f78eaa5..f3785a7 100644 --- a/test/common/test_rewrite_parts.ml +++ b/test/common/test_rewrite_parts.ml @@ -3,49 +3,11 @@ open Core open Test_helpers open Comby_kernel open Matchers -open Match let all ?(configuration = configuration) template source = Alpha.C.all ~configuration ~template ~source () -module Template_parser = Rewrite.Make(Metasyntax.Default) - -let%expect_test "get_offsets_for_holes" = - let rewrite_template = {|1234:[1]1234:[2]|} in - let variables = Template_parser.variables rewrite_template in - let offsets = Rewrite.get_offsets_for_holes variables rewrite_template in - print_s [%message (offsets : (string * int) list)]; - [%expect_exact {|(offsets ((2 8) (1 4))) -|}] - -let%expect_test "get_offsets_for_holes_after_substitution_1" = - let rewrite_template = {|1234:[1]1234:[2]|} in - let variables = Template_parser.variables rewrite_template in - let offsets = Rewrite.get_offsets_for_holes variables rewrite_template in - let environment = - Environment.create () - |> (fun environment -> Environment.add environment "1" "333") - |> (fun environment -> Environment.add environment "2" "22") - in - let result = Rewrite.get_offsets_after_substitution offsets environment in - print_s [%message (result : (string * int) list)]; - [%expect_exact {|(result ((2 11) (1 4))) -|}] - -let%expect_test "get_offsets_for_holes_after_substitution_1" = - let rewrite_template = {|1234:[1]1234:[3]11:[2]|} in - let variables = Template_parser.variables rewrite_template in - let offsets = Rewrite.get_offsets_for_holes variables rewrite_template in - let environment = - Environment.create () - |> (fun environment -> Environment.add environment "1" "333") - |> (fun environment -> Environment.add environment "3" "333") - |> (fun environment -> Environment.add environment "2" "22") - in - let result = Rewrite.get_offsets_after_substitution offsets environment in - print_s [%message (result : (string * int) list)]; - [%expect_exact {|(result ((2 16) (3 11) (1 4))) -|}] +module Template_parser = Template.Make(Metasyntax.Default) let%expect_test "comments_in_string_literals_should_not_be_treated_as_comments_by_fuzzy" = let source = {|123433312343331122|} in diff --git a/test/common/test_rewrite_rule.ml b/test/common/test_rewrite_rule.ml index 3492d04..abb2ce5 100644 --- a/test/common/test_rewrite_rule.ml +++ b/test/common/test_rewrite_rule.ml @@ -4,25 +4,6 @@ open Test_helpers open Comby_kernel open Matchers -let run_rule (module E : Engine.S) source match_template rewrite_template rule = - let (module M : Matcher.S) = (module E.Generic) in - M.first ~configuration match_template source - |> function - | Error _ -> print_string "bad" - | Ok result -> - match result with - | ({ environment; _ } as m) -> - let e = Rule.(result_env @@ apply ~match_all:(M.all ~rule:[Ast.True] ~nested:false) rule environment) in - match e with - | None -> print_string "bad bad" - | Some e -> - { m with environment = e } - |> List.return - |> Rewrite.all ~source ~rewrite_template - |> (fun x -> Option.value_exn x) - |> (fun { rewritten_source; _ } -> rewritten_source) - |> print_string - let%expect_test "rewrite_rule" = let source = {|int|} in let match_template = {|:[1]|} in diff --git a/test/common/test_script.ml b/test/common/test_script.ml index 4cfdd26..0479043 100644 --- a/test/common/test_script.ml +++ b/test/common/test_script.ml @@ -11,21 +11,15 @@ let run input = let%expect_test "test_script_basic_sequence" = let script = {|:[x] -> :[y] where nested---|} in run script; - [%expect_exact {|((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ("(Variable y)")))))|}]; + [%expect_exact {|ERROR|}]; let script = {|:[x] -> :[y] where nested --- |} in run script; - [%expect_exact {|((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ("(Variable y)")))))|}]; + [%expect_exact {|ERROR|}]; let script = {|:[x] -> :[y] where nested|} in run script; - [%expect_exact {|((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ("(Variable y)")))))|}]; + [%expect_exact {|ERROR|}]; let script = {| :[x] -> :[y] where nested --- @@ -33,52 +27,30 @@ let%expect_test "test_script_basic_sequence" = |} in run script; - [%expect_exact {|((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ("(Variable y)")))) - (Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ("(Variable y)")))))|}] + [%expect_exact {|ERROR|}] let%expect_test "test_script_optional_rewrite" = let script = {|:[x] where nested|} in run script; - [%expect_exact {|((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ()))))|}]; + [%expect_exact {|ERROR|}]; let script = {|:[x] where nested--- :[y] where nested|} in run script; - [%expect_exact {|((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ()))) - (Spec - ((match_template "(Variable y)") (rule (((Option nested)))) - (rewrite_template ()))))|}] + [%expect_exact {|ERROR|}] let%expect_test "test_script_optional_rule" = let script = {|:[x]|} in run script; - [%expect_exact {|((Spec ((match_template "(Variable x)") (rule ()) (rewrite_template ()))))|}]; + [%expect_exact {|ERROR|}]; let script = {|:[x]--- :[y]--- :[z] -> :[q]|} in run script; - [%expect_exact {|((Spec ((match_template "(Variable x)") (rule ()) (rewrite_template ()))) - (Spec ((match_template "(Variable y)") (rule ()) (rewrite_template ()))) - (Spec - ((match_template "(Variable z)") (rule ()) - (rewrite_template ("(Variable q)")))))|}] + [%expect_exact {|ERROR|}] let%expect_test "test_spec_expressions" = let script = {|:[x] -> :[y] where nested or :[y] -> :[t] where nested---|} in run script; - [%expect_exact {|((Exp Or - ((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ("(Variable y)")))) - (Spec - ((match_template "(Variable y)") (rule (((Option nested)))) - (rewrite_template ("(Variable t)")))))))|}]; + [%expect_exact {|ERROR|}]; let script = {| --- @@ -87,17 +59,4 @@ let%expect_test "test_spec_expressions" = not :[x] and :[y] or :[z] |} in run script; - [%expect_exact {|((Exp Or - ((Spec - ((match_template "(Variable x)") (rule (((Option nested)))) - (rewrite_template ()))) - (Spec - ((match_template "(Variable y)") (rule (((Option nested)))) - (rewrite_template ("(Variable t)")))))) - (Exp Or - ((Exp And - ((Exp Not - ((Spec - ((match_template "(Variable x)") (rule ()) (rewrite_template ()))))) - (Spec ((match_template "(Variable y)") (rule ()) (rewrite_template ()))))) - (Spec ((match_template "(Variable z)") (rule ()) (rewrite_template ()))))))|}] + [%expect_exact {|ERROR|}] diff --git a/test/common/test_statistics.ml b/test/common/test_statistics.ml index fe7a64c..94b7434 100644 --- a/test/common/test_statistics.ml +++ b/test/common/test_statistics.ml @@ -32,7 +32,7 @@ let %expect_test "statistics" = in Alpha.Go.all ~configuration ~template ~source () |> List.filter ~f:(fun { environment; _ } -> - Rule.(sat @@ apply ~match_all:(Alpha.Generic.all ~rule:[Ast.True] ~nested:false) rule environment)) + Rule.(sat @@ apply ~match_all:(Alpha.Generic.all ~rule:[Ast.True]) rule environment)) |> fun matches -> let statistics = Statistics. diff --git a/test/common/test_substitute.ml b/test/common/test_substitute.ml index e2d1570..9c7b441 100644 --- a/test/common/test_substitute.ml +++ b/test/common/test_substitute.ml @@ -1,18 +1,8 @@ open Core +open Comby_kernel open Match - open Matchers -open Rewriter -open Rewrite_template - -let parse metasyntax template = - let (module M) = Matchers.Metasyntax.create metasyntax in - let module Template_parser = Make(M) in - let tree = Template_parser.parse template in - match tree with - | Some tree -> Sexp.to_string_hum (sexp_of_list sexp_of_extracted tree) - | None -> "ERROR: NO PARSE" let%expect_test "substitute_entire_regex_pattern_in_custom_metasyntax" = let metasyntax = @@ -28,6 +18,6 @@ let%expect_test "substitute_entire_regex_pattern_in_custom_metasyntax" = (* Don't just substitute for `$B`, but for `$B:\w+ `. This depends on Regex (more specific syntax) being defined _after_ the general syntax. *) let template = {|$A $B:\w+ |} in let environment = Environment.add (Environment.create ()) "B" "hello" in - let result, _ = Rewrite_template.substitute ~metasyntax template environment in + let result = Rewrite.substitute ~metasyntax template environment in print_string result; [%expect_exact {|$A hello|}] diff --git a/test/common/test_template_constraints.ml b/test/common/test_template_constraints.ml new file mode 100644 index 0000000..78d214e --- /dev/null +++ b/test/common/test_template_constraints.ml @@ -0,0 +1,17 @@ +open Core + +open Comby_kernel +open Matchers + +open Test_helpers + +let%expect_test "implicit_equals" = + let source = "(fun i -> j x) (fun x -> x x)" in + let match_template = "fun :[[a]] -> :[[a]] :[[a]]" in + + run_all_matches (module Alpha.Generic) source match_template; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":16,"line":1,"column":17},"end":{"offset":28,"line":1,"column":29}},"environment":[{"variable":"a","value":"x","range":{"start":{"offset":20,"line":1,"column":21},"end":{"offset":21,"line":1,"column":22}}},{"variable":"a_equal_!@#$000000000003","value":"x","range":{"start":{"offset":25,"line":1,"column":26},"end":{"offset":26,"line":1,"column":27}}},{"variable":"a_equal_!@#$000000000004","value":"x","range":{"start":{"offset":27,"line":1,"column":28},"end":{"offset":28,"line":1,"column":29}}}],"matched":"fun x -> x x"}]} +|}]; + run_all_matches (module Omega.Generic) source match_template; + [%expect_exact {|{"uri":null,"matches":[{"range":{"start":{"offset":16,"line":1,"column":17},"end":{"offset":28,"line":1,"column":29}},"environment":[{"variable":"a","value":"x","range":{"start":{"offset":20,"line":1,"column":21},"end":{"offset":21,"line":1,"column":22}}},{"variable":"a_equal_!@#$000000000005","value":"x","range":{"start":{"offset":25,"line":1,"column":26},"end":{"offset":26,"line":1,"column":27}}},{"variable":"a_equal_!@#$000000000006","value":"x","range":{"start":{"offset":27,"line":1,"column":28},"end":{"offset":28,"line":1,"column":29}}}],"matched":"fun x -> x x"}]} +|}] diff --git a/test/common/test_user_defined_language.ml b/test/common/test_user_defined_language.ml index 3ac0e2b..e53336c 100644 --- a/test/common/test_user_defined_language.ml +++ b/test/common/test_user_defined_language.ml @@ -18,7 +18,7 @@ let run (module E : Engine.S) user_lang source match_template rewrite_template = let%expect_test "user_defined_language" = let user_lang = - Syntax. + Language.Syntax. { user_defined_delimiters = [("case", "esac")] ; escapable_string_literals = None ; raw_string_literals = [] @@ -97,7 +97,7 @@ let%expect_test "user_defined_language_from_json" = in let user_lang = Yojson.Safe.from_string json - |> Matchers.Syntax.of_yojson + |> Matchers.Language.Syntax.of_yojson |> Result.ok_or_failwith in let source = "" in @@ -126,7 +126,7 @@ let%expect_test "user_defined_language_from_json_optional_escapable" = in let user_lang = Yojson.Safe.from_string json - |> Matchers.Syntax.of_yojson + |> Matchers.Language.Syntax.of_yojson |> Result.ok_or_failwith in let source = "" in diff --git a/test/example/metasyntax/test.json b/test/example/metasyntax/test.json new file mode 100644 index 0000000..9a5ba4f --- /dev/null +++ b/test/example/metasyntax/test.json @@ -0,0 +1,8 @@ +{ + "syntax": [ + [ "Hole", [ "Everything" ], [ "Delimited", "$", null ] ], + [ "Hole", [ "Alphanum" ], [ "Delimited", "?", null ] ] + ], + "identifier": + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" +}