diff --git a/lib/kernel/matchers/omega.ml b/lib/kernel/matchers/omega.ml index b53f658..a23bdab 100644 --- a/lib/kernel/matchers/omega.ml +++ b/lib/kernel/matchers/omega.ml @@ -127,7 +127,7 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct let acc = f acc production in match production with | String s -> - if debug then Format.printf "Matched String: %S@." s; + if debug then Format.printf "Saw String: %S@." s; return (Unit, acc) | Match { offset = pos_begin; identifier; text = content } -> if debug then Format.printf "Match: %S @@ %d for %s@." content pos_begin identifier; @@ -358,43 +358,42 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct let identifier, pattern = String.lsplit2_exn identifier ~on:'~' in let identifier = if String.(identifier = "") then "_" else identifier in if debug then Format.printf "Regex: Id: %s Pat: %s@." identifier pattern; + let pattern, prefix = + if String.is_prefix pattern ~prefix:"^" then + (* FIXME: match beginning of input too *) + String.drop_prefix pattern 1, + Some ( + (char '\n' *> return "") + <|> + (pos >>= fun p -> if p = 0 then return "" else fail "") + ) + else + pattern, None + in + let pattern, suffix = + if String.is_suffix pattern ~suffix:"$" then + String.drop_suffix pattern 1, Some (char '\n' *> return "" <|> end_of_input *> return "") + else + pattern, None + in let compiled_regexp = Regexp.PCRE.make_regexp pattern in let regexp_parser = Regexp.PCRE.regexp compiled_regexp in - let base_parser = [ regexp_parser; end_of_input >>= fun () -> return "" ] in (* the eof matters here for that one tricky test case *) - let base_parser = - (* adds begin line parser if the pattern has ^ anchor *) - if String.is_prefix pattern ~prefix:"^" then - let p = - Regexp.PCRE.make_regexp (String.drop_prefix pattern 1) |> Regexp.PCRE.regexp - in - (char '\n' >>= fun _ -> p)::base_parser - else - base_parser + let regexp_parser = + match prefix, suffix with + | Some prefix, None -> prefix *> regexp_parser + | None, Some suffix -> regexp_parser <* suffix + | Some prefix, Some suffix -> prefix *> regexp_parser <* suffix + | None, None -> regexp_parser in + (* the eof matters here for that one tricky test case *) let base_parser = - if String.is_suffix pattern ~suffix:"$" then - let p = Regexp.PCRE.make_regexp (String.drop_prefix pattern 1) |> Regexp.PCRE.regexp in - (p <* (ignore @@ char '\n' <|> end_of_input))::base_parser - else - base_parser + [ regexp_parser + ; end_of_input >>= fun () -> return "" + ] in pos >>= fun offset -> - if debug then Format.printf "(X)@."; - choice base_parser - >>= fun value -> + choice base_parser >>= fun value -> if debug then Format.printf "Regex match @@ %d value %s@." offset value; - let offset = - if String.length value = 0 then - offset (*offset + 1 this may not matter, if we correct for the whole match conext *) - else - offset - in - (if String.length value = 0 then - (*advance 1*) - advance 0 - else - advance @@ String.length value) >>= fun () -> - if debug then Format.printf "(Y)@."; acc >>= fun _ -> let m = { offset diff --git a/lib/kernel/matchers/regexp.ml b/lib/kernel/matchers/regexp.ml index f6a2538..69e9834 100644 --- a/lib/kernel/matchers/regexp.ml +++ b/lib/kernel/matchers/regexp.ml @@ -60,14 +60,15 @@ module Make (Regexp: Regexp_engine_intf) = struct if debug then Format.printf "Matchy Matchy (3)@."; Some (result, String.length result)) >>= function - | Some (result, _n) -> + | Some (result, n) -> (* if empty string matches, this hole like for optionals (x?), advance 1. *) (* we want to advance one so parsing can continue, but if we advance 1 here we will think that the match context is at least length 1 and not 0 if this hole is the only thing defining the match context *) (* let n = if n > 0 then n else 1 in advance n >>= fun () -> *) - if debug then Format.printf "Result indeed: %s len %d@." result _n; + if debug then Format.printf "Result indeed: %S len %d@." result n; + advance n >>= fun () -> return result | None -> fail "No match" diff --git a/test/common/test_regex_holes_omega.ml b/test/common/test_regex_holes_omega.ml index dcfd128..e2ac5ba 100644 --- a/test/common/test_regex_holes_omega.ml +++ b/test/common/test_regex_holes_omega.ml @@ -134,8 +134,8 @@ let%expect_test "leading_spaces_beginning_line_anchor" = let rewrite_template = {|(:[x])|} in run (module Generic) source match_template rewrite_template; - [%expect_exact {|( )a( )b( )c( -)|}] + [%expect_exact {|( )a( )b( )c +|}] let%expect_test "spaces_star" = let source = {| @@ -157,7 +157,6 @@ let%expect_test "spaces_star" = )()d( )|}] -(* let%expect_test "end_line_anchor" = let source = {| aaa bbb @@ -173,7 +172,16 @@ ccc ddd (aaa bbb)aaa bbb ccc ccc ddd |}] -*) + +let%expect_test "start_of_input_and_exact" = + let source = {|aaa bbb|} + in + let match_template = {|:[x~^\w+ bbb$]|} in + let rewrite_template = {|(:[x])|} in + + run (module Generic) source match_template rewrite_template; + [%expect_exact {|(aaa bbb)|}] + let%expect_test "word_boundaries" = let source = {|