add support for regex holes (#198)

This commit is contained in:
Rijnard van Tonder 2020-08-08 02:01:06 -07:00 committed by GitHub
parent 872ca91ba5
commit a8f6cb7694
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 329 additions and 4 deletions

View File

@ -1,12 +1,16 @@
open Core
open MParser
open MParser_PCRE
open Configuration
open Match
open Range
open Location
open Types
module R = MakeRegexp(Regexp)
let configuration_ref = ref (Configuration.create ())
let weaken_delimiter_hole_matching = false
@ -237,6 +241,25 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
let alphanum_hole_parser () =
string ":[[" >> hole_body () << string "]]"
let regex_body () =
let rec expr s =
(choice
[ ((char '[' >> (many1 expr) << char ']') |>> fun char_class -> Format.sprintf "[%s]" @@ String.concat char_class)
; (char '\\' >> any_char |>> fun c -> (Format.sprintf "\\%c" c))
; ((is_not (char ']')) |>> Char.to_string)
]) s
in
let regex_identifier () =
identifier () >>= fun v -> char '~' >> many1 expr >>= fun e -> return (Format.sprintf "%s~%s" v (String.concat e))
in
regex_identifier () >>= fun identifier ->
if debug then Format.printf "Regex accepts %s@." identifier;
return (false, identifier)
let regex_hole_parser () =
string ":[" >> regex_body () << string "]"
let reserved_holes () =
let alphanum = alphanum_hole_parser () |>> snd in
let expression = expression_hole_parser () |>> snd in
@ -244,12 +267,14 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
let non_space = non_space_hole_parser () |>> snd in
let blank = blank_hole_parser () |>> snd in
let line = line_hole_parser () |>> snd in
let regex = regex_hole_parser () |>> snd in
[ non_space
; line
; blank
; alphanum
; expression
; everything
; regex
]
let reserved_delimiters () =
@ -322,6 +347,12 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
get_pos >>= fun (pre_index, pre_line, pre_column) ->
p >>= fun matched ->
get_pos >>= fun (post_index, post_line, post_column) ->
let post_index, post_line, post_column =
if String.(concat matched = "") then
pre_index, pre_line, pre_column
else
post_index, post_line, post_column
in
update_user_state
(fun ({ Match.environment; _ } as result) ->
if debug then begin
@ -593,6 +624,40 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
| Success Hole { sort; identifier; optional; dimension } ->
begin
match sort with
| Regex ->
let identifier, pattern = String.lsplit2_exn identifier ~on:'~' in
if debug then Format.printf "Regex: Id: %s Pat: %s@." identifier pattern;
let compiled_regexp = R.make_regexp pattern in
let regexp_parser = R.regexp compiled_regexp in
let base_parser = [ regexp_parser ] in
let base_parser =
(* adds begin line parser if the pattern has ^ anchor *)
if String.is_prefix pattern ~prefix:"^" then
let p =
R.make_regexp (String.drop_prefix pattern 1)
|> R.regexp
in
(char '\n' >>= fun _ -> p)::base_parser
else
base_parser
in
let base_parser =
(* adds end line parser if the pattern has $ anchor *)
if String.is_suffix pattern ~suffix:"$" then
let p =
R.make_regexp (String.drop_suffix pattern 1)
|> R.regexp
in
(p << char '\n')::base_parser
else
base_parser
in
let hole_semantics =
choice base_parser >>= fun result ->
if debug then Format.printf "Regex success: %s@." result;
return [result]
in
(record_matches identifier hole_semantics)::acc
| Alphanum ->
let allowed = choice [alphanum; char '_'] |>> String.of_char in
let hole_semantics = many1 allowed in
@ -773,6 +838,7 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
| Line -> line_hole_parser ()
| Blank -> blank_hole_parser ()
| Alphanum -> alphanum_hole_parser ()
| Regex -> regex_hole_parser ()
in
let skip_signal hole = skip (string "_signal_hole") |>> fun () -> Hole hole in
hole_parser |>> fun (optional, identifier) -> skip_signal { sort; identifier; dimension; optional }
@ -981,7 +1047,13 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
match first' shift p original_source with
| Ok ({range = { match_start; match_end; _ }; _} as result) ->
let shift = match_end.offset in
let matched = extract_matched_text original_source match_start match_end in
let shift, matched =
if match_start.offset = match_end.offset then
match_start.offset + 1, "" (* advance one if the matched content is the empty string *)
else
shift, extract_matched_text original_source match_start match_end
in
if debug then Format.printf "Extracted matched: %s" matched;
let result = { result with matched } in
if shift >= String.length original_source then
result :: acc

View File

@ -336,6 +336,7 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
| Ok (Hole { sort; identifier; dimension; _ }, user_state) ->
begin
match sort with
| Regex -> failwith "Not supported (seq chain)"
| Alphanum ->
pos >>= fun offset ->
many1 (generate_single_hole_parser ())
@ -608,6 +609,7 @@ module Make (Syntax : Syntax.S) (Info : Info.S) = struct
| Line -> line_hole_parser ()
| Non_space -> non_space_hole_parser ()
| Expression -> expression_hole_parser ()
| Regex -> single_hole_parser ()
in
let skip_signal hole = skip_unit (string "_signal_hole") |>> fun () -> (Hole hole, acc) in
hole_parser |>> fun identifier -> skip_signal { sort; identifier; dimension; optional = false }

View File

@ -66,6 +66,7 @@ module Hole = struct
| Non_space
| Line
| Blank
| Regex
type t =
{ sort : sort
@ -81,6 +82,7 @@ module Hole = struct
; Non_space
; Line
; Blank
; Regex
]
end

View File

@ -9,8 +9,7 @@ let debug =
let substitute_match_contexts (matches: Match.t list) source replacements =
if debug then
Format.printf "Matches: %d | Replacements: %d@." (List.length matches) (List.length replacements);
if debug then Format.printf "Matches: %d | Replacements: %d@." (List.length matches) (List.length replacements);
let rewrite_template, environment =
List.fold2_exn
matches replacements

View File

@ -3,7 +3,8 @@
(modules
test_optional_holes
test_special_matcher_cases
test_substring_disabled)
test_substring_disabled
test_regex_holes)
(inline_tests)
(preprocess (pps ppx_expect ppx_sexp_message ppx_deriving_yojson ppx_deriving_yojson.runtime))
(libraries

View File

@ -0,0 +1,249 @@
open Core
open Matchers
open Rewriter
open Matchers.Alpha
let configuration = Configuration.create ~match_kind:Fuzzy ()
let run ?(configuration = configuration) (module M : Matchers.Matcher) source match_template rewrite_template =
M.all ~configuration ~template:match_template ~source
|> function
| [] -> print_string "No matches."
| results ->
Option.value_exn (Rewrite.all ~source ~rewrite_template results)
|> (fun { rewritten_source; _ } -> rewritten_source)
|> print_string
let%expect_test "regex_holes_simple" =
let source = {|foo|} in
let match_template = {|:[x~\w+]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(foo)|}]
let%expect_test "regex_holes_simple_posix" =
let source = {|foo|} in
let match_template = {|:[x~[[:alpha:]]]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(f)(o)(o)|}]
let%expect_test "regex_holes_substring" =
let source = {|foo()|} in
let match_template = {|:[x~o\w]()|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|f(oo)|}]
let%expect_test "regex_holes_empty_string_terminates" =
let source = {|foo()|} in
let match_template = {|:[x~|]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|()f()o()o()(())|}]
let%expect_test "regex_holes_repetition_takes_precedence" =
let source = {|foobar()|} in
(* this will _not_ match because bar is consumed by \w before we look ahead *)
let match_template = {|:[x~\w+]bar()|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|No matches.|}]
let%expect_test "regex_holes_negated_match" =
let source = {|(literally_anyting_except_close_paren?!@#$%^&*[])|} in
let match_template = {|(:[x~[^)]+])|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(literally_anyting_except_close_paren?!@#$%^&*[])|}];
let source = {|(arg1, arg2, arg3)|} in
let match_template = {|:[x~[^,() ]+]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|((arg1), (arg2), (arg3))|}]
let%expect_test "regex_holes_dot_star_ok_and_this_is_for_newline" =
let source = "foo()\nbar()" in
let match_template = {|:[x~.*]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(foo())()
(bar())|}]
let%expect_test "regex_holes_optional" =
let source = "nonovember no november no vember" in
let match_template = {|:[x~no(vember)?]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(no)(november) (no) (november) (no) vember|}]
let%expect_test "regex_holes_optional_spaces" =
let source = "nonovember no november no vember" in
let match_template = {|no :[x~(vember)?]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|nonovember ()november (vember)|}]
(* Note: Known limitation: this behavior does _not_ allow (optional)? to match
empty string to sat template. Because of something tricky. *)
let%expect_test "regex_holes_optional_doesnt_work_outside_regex" =
let source = "no" in
let match_template = {|no:[x~(vember)?]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|No matches.|}];
let source = "foo bar foobar" in
let match_template = {|:[x~\s*?]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|()f()o()o() ()b()a()r() ()f()o()o()b()a()r|}];
let source = "foo bar foobar" in
let match_template = {|:[x~\s*]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|()f()o()o( )()b()a()r( )()f()o()o()b()a()r|}]
let%expect_test "regex_holes_optional_strip_no_from_november_outside_regex" =
let source = "nonovember no november no vember" in
let match_template = {|no:[x~(vember)?]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|()(vember) () (vember) () vember|}]
let%expect_test "regex_holes_optional_strip_no_from_november_inside_regex" =
let source = "nonovember no november no vember" in
let match_template = {|:[x~no(vember)?]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(no)(november) (no) (november) (no) vember|}]
let%expect_test "leading_spaces_beginning_line_anchor" =
let source = {|
a
b
c
|}
in
let match_template = {|:[x~^(\s+)]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|( )a( )b( )c
|}]
let%expect_test "spaces_star" =
let source = {|
a
b
c
d
|}
in
let match_template = {|:[x~\s*]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
(* The <spaces><empty space>chars is how this behaves on https://regexr.com/59ft0 as well, see replace *)
[%expect_exact {|(
)()a(
)()b(
)()c(
)()d(
)|}]
let%expect_test "end_line_anchor" =
let source = {|
aaa bbb
aaa bbb ccc
ccc ddd
|}
in
let match_template = {|:[x~\w+ bbb$]|} in
let rewrite_template = {|(:[x])|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|
(aaa bbb)aaa bbb ccc
ccc ddd
|}]
let%expect_test "word_boundaries" =
let source = {|
foo(bar, baz(),
qux.derp)
|}
in
let match_template = {|:[x~\b\w+\b]|} in
let rewrite_template = {|(>:[x]<)|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|
(>foo<)((>bar<), (>baz<)(),
(>qux<).(>derp<))
|}]
(* I previously assumed [^ ] would not match newlines, but it does, and is the
same as regexr https://regexr.com/59fst. To not match newlines, see the next
test with [^\s] *)
let%expect_test "do_not_match_space" =
let source = {|
foo(bar, baz(),
qux.derp)
|}
in
let match_template = {|:[x~[^, ]+]|} in
let rewrite_template = {|(>:[x]<)|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|(>
foo(bar<), (>baz()<),(>
<) (>qux.derp)
<)|}]
let%expect_test "do_not_match_whitespace" =
let source = {|
foo(bar, baz(),
qux.derp)
|}
in
let match_template = {|:[x~[^,\s]+]|} in
let rewrite_template = {|(>:[x]<)|} in
run (module Generic) source match_template rewrite_template;
[%expect_exact {|
(>foo(bar<), (>baz()<),
(>qux.derp)<)
|}]