mirror of
https://github.com/comby-tools/comby.git
synced 2024-09-11 13:25:36 +03:00
fix regex anchors for alternative engine
This commit is contained in:
parent
432f416866
commit
33b41114d5
@ -127,7 +127,7 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
let acc = f acc production in
|
||||
match production with
|
||||
| String s ->
|
||||
if debug then Format.printf "Matched String: %S@." s;
|
||||
if debug then Format.printf "Saw String: %S@." s;
|
||||
return (Unit, acc)
|
||||
| Match { offset = pos_begin; identifier; text = content } ->
|
||||
if debug then Format.printf "Match: %S @@ %d for %s@." content pos_begin identifier;
|
||||
@ -358,43 +358,42 @@ module Make (Language : Language.S) (Unimplemented : Metasyntax.S) = struct
|
||||
let identifier, pattern = String.lsplit2_exn identifier ~on:'~' in
|
||||
let identifier = if String.(identifier = "") then "_" else identifier in
|
||||
if debug then Format.printf "Regex: Id: %s Pat: %s@." identifier pattern;
|
||||
let pattern, prefix =
|
||||
if String.is_prefix pattern ~prefix:"^" then
|
||||
(* FIXME: match beginning of input too *)
|
||||
String.drop_prefix pattern 1,
|
||||
Some (
|
||||
(char '\n' *> return "")
|
||||
<|>
|
||||
(pos >>= fun p -> if p = 0 then return "" else fail "")
|
||||
)
|
||||
else
|
||||
pattern, None
|
||||
in
|
||||
let pattern, suffix =
|
||||
if String.is_suffix pattern ~suffix:"$" then
|
||||
String.drop_suffix pattern 1, Some (char '\n' *> return "" <|> end_of_input *> return "")
|
||||
else
|
||||
pattern, None
|
||||
in
|
||||
let compiled_regexp = Regexp.PCRE.make_regexp pattern in
|
||||
let regexp_parser = Regexp.PCRE.regexp compiled_regexp in
|
||||
let base_parser = [ regexp_parser; end_of_input >>= fun () -> return "" ] in (* the eof matters here for that one tricky test case *)
|
||||
let base_parser =
|
||||
(* adds begin line parser if the pattern has ^ anchor *)
|
||||
if String.is_prefix pattern ~prefix:"^" then
|
||||
let p =
|
||||
Regexp.PCRE.make_regexp (String.drop_prefix pattern 1) |> Regexp.PCRE.regexp
|
||||
in
|
||||
(char '\n' >>= fun _ -> p)::base_parser
|
||||
else
|
||||
base_parser
|
||||
let regexp_parser =
|
||||
match prefix, suffix with
|
||||
| Some prefix, None -> prefix *> regexp_parser
|
||||
| None, Some suffix -> regexp_parser <* suffix
|
||||
| Some prefix, Some suffix -> prefix *> regexp_parser <* suffix
|
||||
| None, None -> regexp_parser
|
||||
in
|
||||
(* the eof matters here for that one tricky test case *)
|
||||
let base_parser =
|
||||
if String.is_suffix pattern ~suffix:"$" then
|
||||
let p = Regexp.PCRE.make_regexp (String.drop_prefix pattern 1) |> Regexp.PCRE.regexp in
|
||||
(p <* (ignore @@ char '\n' <|> end_of_input))::base_parser
|
||||
else
|
||||
base_parser
|
||||
[ regexp_parser
|
||||
; end_of_input >>= fun () -> return ""
|
||||
]
|
||||
in
|
||||
pos >>= fun offset ->
|
||||
if debug then Format.printf "(X)@.";
|
||||
choice base_parser
|
||||
>>= fun value ->
|
||||
choice base_parser >>= fun value ->
|
||||
if debug then Format.printf "Regex match @@ %d value %s@." offset value;
|
||||
let offset =
|
||||
if String.length value = 0 then
|
||||
offset (*offset + 1 this may not matter, if we correct for the whole match conext *)
|
||||
else
|
||||
offset
|
||||
in
|
||||
(if String.length value = 0 then
|
||||
(*advance 1*)
|
||||
advance 0
|
||||
else
|
||||
advance @@ String.length value) >>= fun () ->
|
||||
if debug then Format.printf "(Y)@.";
|
||||
acc >>= fun _ ->
|
||||
let m =
|
||||
{ offset
|
||||
|
@ -60,14 +60,15 @@ module Make (Regexp: Regexp_engine_intf) = struct
|
||||
if debug then Format.printf "Matchy Matchy (3)@.";
|
||||
Some (result, String.length result))
|
||||
>>= function
|
||||
| Some (result, _n) ->
|
||||
| Some (result, n) ->
|
||||
(* if empty string matches, this hole like for optionals (x?), advance 1. *)
|
||||
(* we want to advance one so parsing can continue, but if we advance 1 here we will think
|
||||
that the match context is at least length 1 and not 0 if this hole is the only thing
|
||||
defining the match context *)
|
||||
(* let n = if n > 0 then n else 1 in
|
||||
advance n >>= fun () -> *)
|
||||
if debug then Format.printf "Result indeed: %s len %d@." result _n;
|
||||
if debug then Format.printf "Result indeed: %S len %d@." result n;
|
||||
advance n >>= fun () ->
|
||||
return result
|
||||
| None ->
|
||||
fail "No match"
|
||||
|
@ -134,8 +134,8 @@ let%expect_test "leading_spaces_beginning_line_anchor" =
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|( )a( )b( )c(
|
||||
)|}]
|
||||
[%expect_exact {|( )a( )b( )c
|
||||
|}]
|
||||
|
||||
let%expect_test "spaces_star" =
|
||||
let source = {|
|
||||
@ -157,7 +157,6 @@ let%expect_test "spaces_star" =
|
||||
)()d(
|
||||
)|}]
|
||||
|
||||
(*
|
||||
let%expect_test "end_line_anchor" =
|
||||
let source = {|
|
||||
aaa bbb
|
||||
@ -173,7 +172,16 @@ ccc ddd
|
||||
(aaa bbb)aaa bbb ccc
|
||||
ccc ddd
|
||||
|}]
|
||||
*)
|
||||
|
||||
let%expect_test "start_of_input_and_exact" =
|
||||
let source = {|aaa bbb|}
|
||||
in
|
||||
let match_template = {|:[x~^\w+ bbb$]|} in
|
||||
let rewrite_template = {|(:[x])|} in
|
||||
|
||||
run (module Generic) source match_template rewrite_template;
|
||||
[%expect_exact {|(aaa bbb)|}]
|
||||
|
||||
|
||||
let%expect_test "word_boundaries" =
|
||||
let source = {|
|
||||
|
Loading…
Reference in New Issue
Block a user