mirror of
https://github.com/comby-tools/comby.git
synced 2024-10-26 13:19:23 +03:00
regex holes refactor
This commit is contained in:
parent
b1e29ce339
commit
9d839d1a1b
@ -3,4 +3,4 @@
|
||||
(public_name comby-kernel.matchers)
|
||||
(instrumentation (backend bisect_ppx))
|
||||
(preprocess (pps ppx_here ppx_sexp_conv ppx_sexp_message ppx_deriving_yojson))
|
||||
(libraries comby-kernel.parsers comby-kernel.match angstrom core_kernel mparser mparser-pcre yojson ppx_deriving_yojson))
|
||||
(libraries comby-kernel.parsers comby-kernel.match angstrom core_kernel mparser mparser-pcre re yojson ppx_deriving_yojson))
|
||||
|
@ -27,6 +27,9 @@ let current_environment_ref : Match.Environment.t ref = ref (Match.Environment.c
|
||||
let (|>>) p f =
|
||||
p >>= fun x -> return (f x)
|
||||
|
||||
let ignore p =
|
||||
p *> return ()
|
||||
|
||||
let debug =
|
||||
match Sys.getenv "DEBUG_COMBY" with
|
||||
| exception Not_found -> false
|
||||
@ -73,7 +76,7 @@ let substitute template env =
|
||||
let record_match_context pos_before pos_after =
|
||||
let open Match.Location in
|
||||
if debug then Format.printf "match context start pos: %d@." pos_before;
|
||||
if debug then Format.printf "match context end pos %d@." pos_after;
|
||||
if debug then Format.printf "match context end pos (this is wrong) %d@." pos_after;
|
||||
let extract_matched_text source { offset = match_start; _ } { offset = match_end; _ } =
|
||||
String.slice source match_start match_end
|
||||
in
|
||||
@ -351,7 +354,55 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
| Ok (Hole { sort; identifier; dimension; _ }, user_state) ->
|
||||
begin
|
||||
match sort with
|
||||
| Regex -> failwith "Not supported (seq chain)"
|
||||
| Regex ->
|
||||
let identifier, pattern = String.lsplit2_exn identifier ~on:'~' in
|
||||
let identifier = if String.(identifier = "") then "_" else identifier in
|
||||
if debug then Format.printf "Regex: Id: %s Pat: %s@." identifier pattern;
|
||||
let compiled_regexp = Regexp.PCRE.make_regexp pattern in
|
||||
let regexp_parser = Regexp.PCRE.regexp compiled_regexp in
|
||||
let base_parser = [ regexp_parser; end_of_input >>= fun () -> return "" ] in (* the eof matters here for that one tricky test case *)
|
||||
let base_parser =
|
||||
(* adds begin line parser if the pattern has ^ anchor *)
|
||||
if String.is_prefix pattern ~prefix:"^" then
|
||||
let p =
|
||||
Regexp.PCRE.make_regexp (String.drop_prefix pattern 1) |> Regexp.PCRE.regexp
|
||||
in
|
||||
(char '\n' >>= fun _ -> p)::base_parser
|
||||
else
|
||||
base_parser
|
||||
in
|
||||
let base_parser =
|
||||
if String.is_suffix pattern ~suffix:"$" then
|
||||
let p = Regexp.PCRE.make_regexp (String.drop_prefix pattern 1) |> Regexp.PCRE.regexp in
|
||||
(p <* (ignore @@ char '\n' <|> end_of_input))::base_parser
|
||||
else
|
||||
base_parser
|
||||
in
|
||||
pos >>= fun offset ->
|
||||
if debug then Format.printf "(X)@.";
|
||||
choice base_parser
|
||||
>>= fun value ->
|
||||
if debug then Format.printf "Regex match @@ %d value %s@." offset value;
|
||||
let offset =
|
||||
if String.length value = 0 then
|
||||
offset (*offset + 1 this may not matter, if we correct for the whole match conext *)
|
||||
else
|
||||
offset
|
||||
in
|
||||
(if String.length value = 0 then
|
||||
(*advance 1*)
|
||||
advance 0
|
||||
else
|
||||
advance @@ String.length value) >>= fun () ->
|
||||
if debug then Format.printf "(Y)@.";
|
||||
acc >>= fun _ ->
|
||||
let m =
|
||||
{ offset
|
||||
; identifier
|
||||
; text = value
|
||||
}
|
||||
in
|
||||
r user_state (Match m)
|
||||
| Alphanum ->
|
||||
pos >>= fun offset ->
|
||||
many1 (generate_single_hole_parser ())
|
||||
@ -515,10 +566,10 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
it in the template. So it should always match to
|
||||
end_of_input (not empty string) *)
|
||||
if !i = 0 then
|
||||
(if debug then Format.printf "hole until: match to the end of this level@.";
|
||||
(if debug then Format.printf "hole everything until: match to the end of this level@.";
|
||||
end_of_input)
|
||||
else
|
||||
(if debug then Format.printf "hole until: append suffix@.";
|
||||
(if debug then Format.printf "hole everything until: append suffix@.";
|
||||
skip_unit acc)
|
||||
in
|
||||
let first_pos = ref (-1) in
|
||||
@ -567,6 +618,7 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
; text
|
||||
}
|
||||
in
|
||||
if debug then Format.printf "Recording!@.";
|
||||
r user_state (Match m)
|
||||
end
|
||||
| Ok (_, _user_state) -> failwith "unreachable: _signal_hole parsed but not handled by Hole variant"
|
||||
@ -618,6 +670,23 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
*> identifier_parser ()
|
||||
<* string "]"
|
||||
|
||||
let regex_expression () =
|
||||
fix (fun expr ->
|
||||
choice
|
||||
[ lift (fun x -> Format.sprintf "[%s]" @@ String.concat x) (char '[' *> many1 expr <* char ']')
|
||||
; lift (fun c -> Format.sprintf {|\%c|} c) (char '\\' *> any_char)
|
||||
; lift Char.to_string (not_char ']')
|
||||
])
|
||||
|
||||
let regex_body () =
|
||||
lift2
|
||||
(fun v e -> Format.sprintf "%s~%s" v (String.concat e))
|
||||
(identifier_parser ())
|
||||
(char '~' *> many1 (regex_expression ()))
|
||||
|
||||
let regex_hole_parser () =
|
||||
string ":[" *> regex_body () <* string "]"
|
||||
|
||||
let hole_parser sort dimension : (production * 'a) t t =
|
||||
let open Hole in
|
||||
let hole_parser =
|
||||
@ -628,7 +697,7 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
| Line -> line_hole_parser ()
|
||||
| Non_space -> non_space_hole_parser ()
|
||||
| Expression -> expression_hole_parser ()
|
||||
| Regex -> single_hole_parser ()
|
||||
| Regex -> regex_hole_parser ()
|
||||
in
|
||||
let skip_signal hole = skip_unit (string "_signal_hole") |>> fun () -> (Hole hole, acc) in
|
||||
hole_parser |>> fun identifier -> skip_signal { sort; identifier; dimension; optional = false; at_depth = None }
|
||||
@ -672,7 +741,9 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
|
||||
let general_parser_generator : (production * 'a) t t =
|
||||
let spaces : (production * 'a) t t =
|
||||
many1 (comment_parser <|> spaces1) |>> fun result -> generate_spaces_parser (String.concat result)
|
||||
lift
|
||||
(fun result -> generate_spaces_parser (String.concat result))
|
||||
(many1 (comment_parser <|> spaces1))
|
||||
in
|
||||
let other =
|
||||
(many1 (Parser.Deprecate.any_char_except ~reserved:Deprecate.reserved) |>> String.of_char_list)
|
||||
@ -740,38 +811,31 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
; any_char |>> Char.to_string
|
||||
]
|
||||
in
|
||||
(* many1 may be appropriate *)
|
||||
let matches =
|
||||
many @@
|
||||
many_till (prefix >>= fun s -> r acc (String s))
|
||||
begin
|
||||
at_end_of_input >>= fun at_end ->
|
||||
if debug then Format.printf "We are at the end? %b.@." at_end;
|
||||
if at_end then fail "end"
|
||||
else
|
||||
(* We may have found a match *)
|
||||
let match_one =
|
||||
pos >>= fun start_pos ->
|
||||
let matched =
|
||||
matcher >>= fun production ->
|
||||
if debug then Format.printf "Full match context result@.";
|
||||
current_environment_ref := Match.Environment.create ();
|
||||
consumed matcher >>= fun value ->
|
||||
if debug then Format.printf "Full match context result: %s@." value;
|
||||
pos >>= fun end_pos ->
|
||||
record_match_context start_pos end_pos;
|
||||
let start_pos =
|
||||
if String.length value = 0 then
|
||||
start_pos (*offset + 1 this may not matter, if we correct for the whole match conext *)
|
||||
else
|
||||
start_pos
|
||||
in
|
||||
(if String.length value = 0 then
|
||||
advance 1
|
||||
else
|
||||
return ()) >>= fun () ->
|
||||
if debug then Format.printf "Calculated end_pos %d@." end_pos;
|
||||
record_match_context start_pos (end_pos);
|
||||
current_environment_ref := Match.Environment.create ();
|
||||
return production
|
||||
in
|
||||
let no_match =
|
||||
(* Reset any partial binds of holes in environment. *)
|
||||
if debug then Format.printf "Failed to match and not at end.@.";
|
||||
current_environment_ref := Match.Environment.create ();
|
||||
(* cannot return: we must try some other parser or else we'll
|
||||
infini loop! We can't advance because we haven't
|
||||
successfully parsed the character at the current position.
|
||||
So: fail and try another parser in the choice. *)
|
||||
fail "no match, try something else"
|
||||
in
|
||||
choice [ matched; no_match ]
|
||||
end
|
||||
return (Unit, "")
|
||||
in
|
||||
(* many1 may be appropriate *)
|
||||
let prefix = (prefix >>= fun s -> r acc (String s)) in
|
||||
let first_match_attempt = choice [match_one; prefix] in (* consumes a character in prefix if no match *)
|
||||
let matches = many_till first_match_attempt end_of_input in
|
||||
matches >>= fun _result ->
|
||||
r acc Unit
|
||||
|
||||
@ -908,7 +972,6 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
(* Don't reverse the list for non-nested matches--it matters for rewriting. *)
|
||||
aux_all ?configuration ~nested ~template ~source:original_source ()
|
||||
|
||||
|
||||
let first ?configuration ?shift:_ template source : Match.t Or_error.t =
|
||||
configuration_ref := Option.value configuration ~default:!configuration_ref;
|
||||
matches_ref := [];
|
||||
|
131
lib/kernel/matchers/regexp.ml
Normal file
131
lib/kernel/matchers/regexp.ml
Normal file
@ -0,0 +1,131 @@
|
||||
open Angstrom
|
||||
|
||||
let debug =
|
||||
match Sys.getenv "DEBUG_COMBY" with
|
||||
| exception Not_found -> false
|
||||
| _ -> true
|
||||
|
||||
module type Regexp_engine_intf = sig
|
||||
type t
|
||||
type substrings
|
||||
|
||||
val make: string -> t
|
||||
|
||||
val get_substring: substrings -> int -> string option
|
||||
|
||||
val get_all_substrings: substrings -> string array
|
||||
|
||||
val exec: rex:t -> pos:int -> Bytes.t -> substrings option
|
||||
end
|
||||
|
||||
type t =
|
||||
{ buffer_pos : int
|
||||
; buffer : bytes
|
||||
}
|
||||
|
||||
(* I think I should just implement the analog of string_ for regex with some bounded buffer size. *)
|
||||
|
||||
module Make (Regexp: Regexp_engine_intf) = struct
|
||||
(* https://sourcegraph.com/github.com/comby-tools/mparser/-/blob/src/mParser_Char_Stream.ml#L231:8 *)
|
||||
let match_regexp s pos rex =
|
||||
Regexp.exec ~rex ~pos:(pos - s.buffer_pos) s.buffer
|
||||
|
||||
let make_regexp pat =
|
||||
Regexp.make pat
|
||||
|
||||
(* TODO: tests and blit thing below *)
|
||||
|
||||
(* FIXME: size. about advance => want to use internal unsafe_apply_opt
|
||||
actually. cf. string_ in angstrom.ml. instead, trying "do peek, then
|
||||
advance/commit." *)
|
||||
let regexp rex : string Angstrom.t =
|
||||
(* Why do Unsafe if I can just do peek_string? => So I don't allocate on copy of buffer. *)
|
||||
(* But it looks like we can't avoid allocation in converting bigstringaf to bytes *)
|
||||
Unsafe.peek 1 (fun buffer ~off ~len:_ -> Bigstringaf.length buffer - off) >>= fun n ->
|
||||
Unsafe.peek n (fun buffer ~off ~len ->
|
||||
(* This still does a copy :( *)
|
||||
let bytes = Bytes.create len in
|
||||
Bigstringaf.unsafe_blit_to_bytes buffer ~src_off:off bytes ~dst_off:0 ~len;
|
||||
if debug then Format.printf "Matching regex against string: %S@." @@ Bytes.to_string bytes;
|
||||
match Regexp.exec ~rex ~pos:0 bytes with
|
||||
| None ->
|
||||
if debug then Format.printf "None (1)@.";
|
||||
None
|
||||
| Some substrings ->
|
||||
match Regexp.get_substring substrings 0 with
|
||||
| None ->
|
||||
if debug then Format.printf "None (2)@.";
|
||||
None
|
||||
| Some result ->
|
||||
if debug then Format.printf "Matchy Matchy (3)@.";
|
||||
Some (result, String.length result))
|
||||
>>= function
|
||||
| Some (result, _n) ->
|
||||
(* if empty string matches, this hole like for optionals (x?), advance 1. *)
|
||||
(* we want to advance one so parsing can continue, but if we advance 1 here we will think
|
||||
that the match context is at least length 1 and not 0 if this hole is the only thing
|
||||
defining the match context *)
|
||||
(* let n = if n > 0 then n else 1 in
|
||||
advance n >>= fun () -> *)
|
||||
if debug then Format.printf "Result indeed: %s len %d@." result _n;
|
||||
return result
|
||||
| None ->
|
||||
fail "No match"
|
||||
end
|
||||
|
||||
module PCRE = struct
|
||||
module Engine : Regexp_engine_intf = struct
|
||||
type t = Pcre.regexp
|
||||
type substrings = Pcre.substrings
|
||||
|
||||
let compile_flags =
|
||||
Pcre.cflags [ `ANCHORED ]
|
||||
|
||||
let make pattern =
|
||||
Pcre.regexp ~iflags:compile_flags pattern
|
||||
|
||||
let get_substring s idx =
|
||||
match Pcre.get_substring s idx with
|
||||
| result -> Some result
|
||||
| exception Not_found
|
||||
| exception Invalid_argument _ -> None
|
||||
|
||||
let get_all_substrings s =
|
||||
Pcre.get_substrings s
|
||||
|
||||
let exec ~rex ~pos b =
|
||||
match Pcre.exec ~pos ~rex (Bytes.unsafe_to_string b) with
|
||||
| result -> Some result
|
||||
| exception Not_found -> None
|
||||
end
|
||||
|
||||
include Make(Engine)
|
||||
end
|
||||
|
||||
module RE = struct
|
||||
module Engine : Regexp_engine_intf = struct
|
||||
type t = Re.re
|
||||
type substrings = Re.substrings
|
||||
|
||||
let compile_flags =
|
||||
[ `Anchored ]
|
||||
|
||||
let make pattern =
|
||||
Re.Perl.(compile (re ~opts:compile_flags pattern))
|
||||
|
||||
let get_substring s idx =
|
||||
match Re.get s idx with
|
||||
| result -> Some result
|
||||
| exception Not_found -> None
|
||||
|
||||
let get_all_substrings s =
|
||||
Re.get_all s
|
||||
|
||||
let exec ~rex ~pos b =
|
||||
match Re.exec ~pos rex (Bytes.unsafe_to_string b) with
|
||||
| result -> Some result
|
||||
| exception Not_found -> None
|
||||
end
|
||||
|
||||
include Make(Engine)
|
||||
end
|
37
lib/kernel/matchers/regexp.mli
Normal file
37
lib/kernel/matchers/regexp.mli
Normal file
@ -0,0 +1,37 @@
|
||||
module type Regexp_engine_intf = sig
|
||||
type t
|
||||
type substrings
|
||||
|
||||
val make: string -> t
|
||||
|
||||
val get_substring: substrings -> int -> string option
|
||||
|
||||
val get_all_substrings: substrings -> string array
|
||||
|
||||
val exec: rex:t -> pos:int -> Bytes.t -> substrings option
|
||||
end
|
||||
|
||||
(** Represents character stream right now.
|
||||
Compare char stream interface on t and match_regexp descriptions
|
||||
in https://sourcegraph.com/github.com/comby-tools/mparser/-/blob/src/mParser_Char_Stream.mli#L102:8
|
||||
*)
|
||||
type t
|
||||
|
||||
module Make (Regexp : Regexp_engine_intf): sig
|
||||
(* do not use this, use regexp. *)
|
||||
val match_regexp: t -> int -> Regexp.t -> Regexp.substrings option
|
||||
|
||||
val make_regexp : string -> Regexp.t
|
||||
|
||||
val regexp : Regexp.t -> string Angstrom.t
|
||||
end
|
||||
|
||||
module PCRE : sig
|
||||
module Engine : Regexp_engine_intf
|
||||
include module type of Make (Engine)
|
||||
end
|
||||
|
||||
module RE : sig
|
||||
module Engine : Regexp_engine_intf
|
||||
include module type of Make (Engine)
|
||||
end
|
@ -5,8 +5,7 @@
|
||||
test_custom_metasyntax
|
||||
test_optional_holes
|
||||
test_special_matcher_cases
|
||||
test_substring_disabled
|
||||
test_regex_holes)
|
||||
test_substring_disabled)
|
||||
(inline_tests)
|
||||
(preprocess (pps ppx_expect ppx_sexp_message ppx_deriving_yojson))
|
||||
(libraries
|
||||
|
@ -2,6 +2,7 @@
|
||||
(name common_test_integration)
|
||||
(package comby)
|
||||
(modules
|
||||
; test_server ; disabled, factor out.
|
||||
test_helpers
|
||||
test_cli_helper
|
||||
test_script
|
||||
@ -10,7 +11,6 @@
|
||||
test_omega
|
||||
test_cli
|
||||
test_cli_list
|
||||
; test_server
|
||||
test_statistics
|
||||
test_offset_conversion
|
||||
test_parse_rule
|
||||
@ -65,6 +65,9 @@
|
||||
|
||||
test_nested_matches_alpha
|
||||
test_nested_matches_omega
|
||||
|
||||
test_regex_holes_alpha
|
||||
test_regex_holes_omega
|
||||
)
|
||||
(inline_tests)
|
||||
(preprocess (pps ppx_expect ppx_sexp_message ppx_deriving_yojson))
|
||||
|
296
test/common/test_regex_holes_omega.ml
Normal file
296
test/common/test_regex_holes_omega.ml
Normal file
@ -0,0 +1,296 @@
|
||||
open Core
|
||||
|
||||
open Rewriter
|
||||
|
||||
open Matchers.Omega
|
||||
|
||||
let configuration = Matchers.Configuration.create ~match_kind:Fuzzy ()
|
||||
|
||||
let run ?(configuration = configuration) (module M : Matchers.Matcher.S) source match_template ?rule rewrite_template =
|
||||
let rule =
|
||||
match rule with
|
||||
| Some rule -> Language.Rule.create rule |> Or_error.ok_exn
|
||||
| None -> Language.Rule.create "where true" |> Or_error.ok_exn
|
||||
in
|
||||
M.all ~configuration ~template:match_template ~source ()
|
||||
|> List.filter ~f:(fun { Match.environment; _ } -> Language.Rule.Alpha.(sat @@ apply rule environment))
|
||||
|> function
|
||||
| [] -> print_string "No matches."
|
||||
| results ->
|
||||
Option.value_exn (Rewrite.all ~source ~rewrite_template results)
|
||||
|> (fun { rewritten_source; _ } -> rewritten_source)
|
||||
|> print_string
|
||||
|
||||
let%expect_test "regex_holes_simple" =
|
||||
let source = {|foo|} in
|
||||
let match_template = {|:[x~\w+]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(foo)|}]
|
||||
|
||||
let%expect_test "regex_holes_simple_posix" =
|
||||
let source = {|foo|} in
|
||||
let match_template = {|:[x~[[:alpha:]]]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(f)(o)(o)|}]
|
||||
|
||||
let%expect_test "regex_holes_substring" =
|
||||
let source = {|foo()|} in
|
||||
let match_template = {|:[x~o\w]()|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|f(oo)|}]
|
||||
|
||||
let%expect_test "regex_holes_empty_string_terminates" =
|
||||
let source = {|foo()|} in
|
||||
let match_template = {|:[x~|]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|()f()o()o()(())|}]
|
||||
|
||||
let%expect_test "regex_holes_repetition_takes_precedence" =
|
||||
let source = {|foobar()|} in
|
||||
(* this will _not_ match because bar is consumed by \w before we look ahead *)
|
||||
let match_template = {|:[x~\w+]bar()|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|No matches.|}]
|
||||
|
||||
let%expect_test "regex_holes_negated_match" =
|
||||
let source = {|(literally_anyting_except_close_paren?!@#$%^&*[])|} in
|
||||
let match_template = {|(:[x~[^)]+])|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(literally_anyting_except_close_paren?!@#$%^&*[])|}];
|
||||
|
||||
let source = {|(arg1, arg2, arg3)|} in
|
||||
let match_template = {|:[x~[^,() ]+]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|((arg1), (arg2), (arg3))|}]
|
||||
|
||||
let%expect_test "regex_holes_dot_star_ok_and_this_is_for_newline" =
|
||||
let source = "foo()\nbar()" in
|
||||
let match_template = {|:[x~.*]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(foo())()
|
||||
(bar())|}]
|
||||
|
||||
let%expect_test "regex_holes_optional" =
|
||||
let source = "nonovember no november no vember" in
|
||||
let match_template = {|:[x~no(vember)?]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(no)(november) (no) (november) (no) vember|}]
|
||||
|
||||
let%expect_test "regex_holes_optional_spaces" =
|
||||
let source = "nonovember no november no vember" in
|
||||
let match_template = {|no :[x~(vember)?]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|nonovember ()november (vember)|}]
|
||||
|
||||
let%expect_test "regex_holes_optional_doesnt_work_outside_regex" =
|
||||
let source = "no" in
|
||||
let match_template = {|no:[x~(vember)?]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|()|}];
|
||||
|
||||
let source = "foo bar foobar" in
|
||||
let match_template = {|:[x~\s*?]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|()f()o()o() ()b()a()r() ()f()o()o()b()a()r|}];
|
||||
|
||||
let source = "foo bar foobar" in
|
||||
let match_template = {|:[x~\s*]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|()f()o()o( )()b()a()r( )()f()o()o()b()a()r|}]
|
||||
|
||||
|
||||
let%expect_test "regex_holes_optional_strip_no_from_november_outside_regex" =
|
||||
let source = "nonovember no november no vember" in
|
||||
let match_template = {|no:[x~(vember)?]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|()(vember) () (vember) () vember|}]
|
||||
|
||||
let%expect_test "regex_holes_optional_strip_no_from_november_inside_regex" =
|
||||
let source = "nonovember no november no vember" in
|
||||
let match_template = {|:[x~no(vember)?]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(no)(november) (no) (november) (no) vember|}]
|
||||
|
||||
let%expect_test "leading_spaces_beginning_line_anchor" =
|
||||
let source = {|
|
||||
a
|
||||
b
|
||||
c
|
||||
|}
|
||||
in
|
||||
let match_template = {|:[x~^(\s+)]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|( )a( )b( )c(
|
||||
)|}]
|
||||
|
||||
let%expect_test "spaces_star" =
|
||||
let source = {|
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
|}
|
||||
in
|
||||
let match_template = {|:[x~\s*]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
(* The <spaces><empty space>chars is how this behaves on https://regexr.com/59ft0 as well, see replace *)
|
||||
[%expect_exact {|(
|
||||
)()a(
|
||||
)()b(
|
||||
)()c(
|
||||
)()d(
|
||||
)|}]
|
||||
|
||||
(*
|
||||
let%expect_test "end_line_anchor" =
|
||||
let source = {|
|
||||
aaa bbb
|
||||
aaa bbb ccc
|
||||
ccc ddd
|
||||
|}
|
||||
in
|
||||
let match_template = {|:[x~\w+ bbb$]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|
|
||||
(aaa bbb)aaa bbb ccc
|
||||
ccc ddd
|
||||
|}]
|
||||
*)
|
||||
|
||||
let%expect_test "word_boundaries" =
|
||||
let source = {|
|
||||
foo(bar, baz(),
|
||||
|
||||
|
||||
qux.derp)
|
||||
|}
|
||||
in
|
||||
let match_template = {|:[x~\b\w+\b]|} in
|
||||
let rewrite_template = {|(>:[x]<)|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|
|
||||
(>foo<)((>bar<), (>baz<)(),
|
||||
|
||||
|
||||
(>qux<).(>derp<))
|
||||
|}]
|
||||
|
||||
(* I previously assumed [^ ] would not match newlines, but it does, and is the
|
||||
same as regexr https://regexr.com/59fst. To not match newlines, see the next
|
||||
test with [^\s] *)
|
||||
let%expect_test "do_not_match_space" =
|
||||
let source = {|
|
||||
foo(bar, baz(),
|
||||
|
||||
|
||||
qux.derp)
|
||||
|}
|
||||
in
|
||||
let match_template = {|:[x~[^, ]+]|} in
|
||||
let rewrite_template = {|(>:[x]<)|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(>
|
||||
foo(bar<), (>baz()<),(>
|
||||
|
||||
|
||||
<) (>qux.derp)
|
||||
<)|}]
|
||||
|
||||
let%expect_test "do_not_match_whitespace" =
|
||||
let source = {|
|
||||
foo(bar, baz(),
|
||||
|
||||
|
||||
qux.derp)
|
||||
|}
|
||||
in
|
||||
let match_template = {|:[x~[^,\s]+]|} in
|
||||
let rewrite_template = {|(>:[x]<)|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|
|
||||
(>foo(bar<), (>baz()<),
|
||||
|
||||
|
||||
(>qux.derp)<)
|
||||
|}]
|
||||
|
||||
let%expect_test "eof_anchor" =
|
||||
let source = {|
|
||||
setScore(5)
|
||||
setScore(6)
|
||||
setScore(6.5)
|
||||
setScore("")
|
||||
setScore("hi")
|
||||
setScore("hi" + "there")
|
||||
setScore('ho')
|
||||
setScore(x)
|
||||
setScore(null)
|
||||
setScore(4/3.0)
|
||||
setScore(4.0/3.0)
|
||||
setScore(4/3)
|
||||
|}
|
||||
in
|
||||
let match_template = {|setScore(:[1])|} in
|
||||
let rule = {|
|
||||
where match :[1] {
|
||||
| ":[~^\\d+$]" -> false
|
||||
| ":[_]" -> true
|
||||
}
|
||||
|}
|
||||
in
|
||||
let rewrite_template = "setScore( /*CHECK ME*/ :[1])" in
|
||||
|
||||
run (module Generic) source match_template ~rule rewrite_template;
|
||||
[%expect_exact {|
|
||||
setScore(5)
|
||||
setScore(6)
|
||||
setScore( /*CHECK ME*/ 6.5)
|
||||
setScore( /*CHECK ME*/ "")
|
||||
setScore( /*CHECK ME*/ "hi")
|
||||
setScore( /*CHECK ME*/ "hi" + "there")
|
||||
setScore( /*CHECK ME*/ 'ho')
|
||||
setScore( /*CHECK ME*/ x)
|
||||
setScore( /*CHECK ME*/ null)
|
||||
setScore( /*CHECK ME*/ 4/3.0)
|
||||
setScore( /*CHECK ME*/ 4.0/3.0)
|
||||
setScore( /*CHECK ME*/ 4/3)
|
||||
|}]
|
Loading…
Reference in New Issue
Block a user