mirror of
https://github.com/comby-tools/comby.git
synced 2024-10-26 21:34:19 +03:00
1001 lines
38 KiB
OCaml
1001 lines
38 KiB
OCaml
open Core
|
|
open MParser
|
|
|
|
open Configuration
|
|
open Match
|
|
open Range
|
|
open Location
|
|
open Types
|
|
|
|
let configuration_ref = ref (Configuration.create ())
|
|
let weaken_delimiter_hole_matching = false
|
|
|
|
let debug =
|
|
Sys.getenv "DEBUG_COMBY"
|
|
|> Option.is_some
|
|
|
|
let debug_hole =
|
|
Sys.getenv "DEBUG_COMBY_HOLE"
|
|
|> Option.is_some
|
|
|
|
let debug_position =
|
|
Sys.getenv "DEBUG_COMBY_POS"
|
|
|> Option.is_some
|
|
|
|
let f _ = return Unit
|
|
|
|
let extract_matched_text source { offset = match_start; _ } { offset = match_end; _ } =
|
|
String.slice source match_start match_end
|
|
|
|
let is_not p s =
|
|
if is_ok (p s) then
|
|
Empty_failed (unknown_error s)
|
|
else
|
|
match read_char s with
|
|
| Some c -> Consumed_ok (c, advance_state s 1, No_error)
|
|
| None -> Empty_failed (unknown_error s)
|
|
|
|
type 'a literal_parser_callback = contents:string -> left_delimiter:string -> right_delimiter:string -> 'a
|
|
type 'a nested_delimiter_callback = left_delimiter:string -> right_delimiter:string -> 'a
|
|
|
|
module Make (Syntax : Syntax.S) (Info : Info.S) = struct
|
|
include Info
|
|
|
|
let escapable_string_literal_parser (f : 'a literal_parser_callback) =
|
|
(match Syntax.escapable_string_literals with
|
|
| None -> []
|
|
| Some { delimiters; escape_character } ->
|
|
List.map delimiters ~f:(fun delimiter ->
|
|
let module M =
|
|
Parsers.String_literals.Alpha.Escapable.Make(struct
|
|
let delimiter = delimiter
|
|
let escape = escape_character
|
|
end)
|
|
in
|
|
M.base_string_literal >>= fun contents ->
|
|
return (f ~contents ~left_delimiter:delimiter ~right_delimiter:delimiter)))
|
|
|> choice
|
|
|
|
let raw_string_literal_parser (f : 'a literal_parser_callback) =
|
|
List.map Syntax.raw_string_literals ~f:(fun (left_delimiter, right_delimiter) ->
|
|
let module M =
|
|
Parsers.String_literals.Alpha.Raw.Make(struct
|
|
let left_delimiter = left_delimiter
|
|
let right_delimiter = right_delimiter
|
|
end)
|
|
in
|
|
M.base_string_literal >>= fun contents ->
|
|
return (f ~contents ~left_delimiter ~right_delimiter))
|
|
|> choice
|
|
|
|
let comment_parser =
|
|
match Syntax.comments with
|
|
| [] -> MParser.zero
|
|
| syntax ->
|
|
List.map syntax ~f:(function
|
|
| Multiline (left, right) ->
|
|
let module M =
|
|
Parsers.Comments.Alpha.Multiline.Make(struct
|
|
let left = left
|
|
let right = right
|
|
end)
|
|
in
|
|
M.comment
|
|
| Nested_multiline (left, right) ->
|
|
let module M =
|
|
Parsers.Comments.Alpha.Nested_multiline.Make(struct
|
|
let left = left
|
|
let right = right
|
|
end)
|
|
in
|
|
M.comment
|
|
| Until_newline start ->
|
|
let module M =
|
|
Parsers.Comments.Alpha.Until_newline.Make(struct
|
|
let start = start
|
|
end)
|
|
in
|
|
M.comment)
|
|
|> choice
|
|
|
|
let escapable_literal_grammar ~right_delimiter =
|
|
match Syntax.escapable_string_literals with
|
|
| None -> zero
|
|
| Some { escape_character; _ } ->
|
|
(string (Format.sprintf "%c%s" escape_character right_delimiter))
|
|
<|>
|
|
(string (Format.sprintf "%c%c" escape_character escape_character))
|
|
<|> (is_not (string right_delimiter) |>> String.of_char)
|
|
|
|
let raw_literal_grammar ~right_delimiter =
|
|
is_not (string right_delimiter) |>> String.of_char
|
|
|
|
let generate_spaces_parser () =
|
|
(* At least a space followed by comments and spaces. *)
|
|
(spaces1
|
|
>> many comment_parser << spaces
|
|
>>= fun result -> f result)
|
|
<|>
|
|
(* This case not covered by tests, may not be needed. *)
|
|
(many1 comment_parser << spaces >>= fun result -> f result)
|
|
|
|
let sequence_chain (plist : ('c, Match.t) parser sexp_list) : ('c, Match.t) parser =
|
|
List.fold plist ~init:(return Unit) ~f:(>>)
|
|
|
|
let with_debug_matcher s tag =
|
|
if debug then
|
|
match tag with
|
|
| `Position tag ->
|
|
if debug_position then
|
|
let prev = prev_char s in
|
|
let curr = read_char s in
|
|
let next = next_char s in
|
|
let print_if = function
|
|
| Some s -> s
|
|
| None -> '?'
|
|
in
|
|
Format.printf "Position Tag: %s@." tag;
|
|
Format.printf "H_prev: %c H_curr: %c H_next: %c@."
|
|
(print_if prev)
|
|
(print_if curr)
|
|
(print_if next)
|
|
| `Delimited delimited ->
|
|
Format.printf "<d>%s</d>%!" delimited
|
|
| `Delimited_suffix suffix ->
|
|
Format.printf "<d_s>%s</d_s>%!" suffix
|
|
| `Checkpoint (label, s) ->
|
|
Format.printf "Point(%s):<d>%s</d>" label s
|
|
| _ -> assert false
|
|
|
|
let is_alphanum delim = Pcre.(pmatch ~rex:(regexp "^[[:alnum:]]+$") delim)
|
|
let whitespace : (id, Match.t) parser = many1 space |>> String.of_char_list
|
|
let not_alphanum = many1 (is_not alphanum) |>> String.of_char_list
|
|
let reserved_alphanum_delimiter_must_satisfy =
|
|
Syntax.user_defined_delimiters
|
|
|> List.filter_map ~f:(fun (from, until) ->
|
|
if not (is_alphanum from) then
|
|
Some [from; until]
|
|
else
|
|
None)
|
|
|> List.concat
|
|
|> List.map ~f:string
|
|
|> List.map ~f:attempt
|
|
|
|
let nested_delimiters_parser (f : 'a nested_delimiter_callback) =
|
|
(* All alphanum delimiter fixups happen in the generated parser, not here. *)
|
|
let between p from until s =
|
|
(string from >>= fun from ->
|
|
if debug then with_debug_matcher s (`Delimited from);
|
|
p >>= fun p_result ->
|
|
string until >>= fun until ->
|
|
if debug then with_debug_matcher s (`Delimited until);
|
|
return p_result)
|
|
s
|
|
in
|
|
Syntax.user_defined_delimiters
|
|
|> List.map ~f:(fun (left_delimiter, right_delimiter) ->
|
|
between
|
|
(f ~left_delimiter ~right_delimiter)
|
|
left_delimiter
|
|
right_delimiter
|
|
)
|
|
|> choice
|
|
(* Backtrack on failure, specifically for alphanum. *)
|
|
|> attempt
|
|
|
|
(** Generates a word that is NOT OK with having alphanum chars before or after
|
|
it. This disables substring parsing. *)
|
|
let generate_word chars : ('c, _) parser =
|
|
(fun s ->
|
|
let prev = prev_char s in
|
|
(match prev with
|
|
| Some prev when is_alphanum (Char.to_string prev) -> fail "unsat"
|
|
| _ ->
|
|
string (String.of_char_list chars)
|
|
>>= fun result ->
|
|
(* there has to be at least one "not alphanum" after this parser for it to succeed, or eof. *)
|
|
look_ahead (skip (many1 (is_not alphanum)) <|> eof) >>= fun _ ->
|
|
f result) s)
|
|
|
|
let generate_string_token_parser str : ('c, _) parser =
|
|
many comment_parser
|
|
>> string str
|
|
>> many comment_parser
|
|
>>= fun result -> f result
|
|
|
|
let is_optional () =
|
|
opt false (char '?' |>> fun _ -> true)
|
|
|
|
let identifier () =
|
|
(many (alphanum <|> char '_') |>> String.of_char_list)
|
|
|
|
let hole_body () =
|
|
is_optional () >>= fun optional ->
|
|
identifier () >>= fun identifier ->
|
|
return (optional, identifier)
|
|
|
|
let everything_hole_parser () =
|
|
string ":[" >> hole_body () << string "]"
|
|
|
|
let expression_hole_parser () =
|
|
string ":[" >> hole_body () << string ":e" << string "]"
|
|
|
|
let non_space_hole_parser () =
|
|
string ":[" >> hole_body () << string ".]"
|
|
|
|
let line_hole_parser () =
|
|
string ":[" >> hole_body () << string "\\n]"
|
|
|
|
let blank_hole_parser () =
|
|
string ":[" >>
|
|
is_optional () >>= fun optional ->
|
|
(many1 blank)
|
|
>> identifier () >>= fun identifier ->
|
|
string "]" >>
|
|
return (optional, identifier)
|
|
|
|
let alphanum_hole_parser () =
|
|
string ":[[" >> hole_body () << string "]]"
|
|
|
|
let reserved_holes () =
|
|
let alphanum = alphanum_hole_parser () |>> snd in
|
|
let expression = expression_hole_parser () |>> snd in
|
|
let everything = everything_hole_parser () |>> snd in
|
|
let non_space = non_space_hole_parser () |>> snd in
|
|
let blank = blank_hole_parser () |>> snd in
|
|
let line = line_hole_parser () |>> snd in
|
|
[ non_space
|
|
; line
|
|
; blank
|
|
; alphanum
|
|
; expression
|
|
; everything
|
|
]
|
|
|
|
let reserved_delimiters () =
|
|
let required_from_suffix = not_alphanum in
|
|
let required_until_suffix = not_alphanum in
|
|
let handle_alphanum_delimiters_reserved_trigger from until =
|
|
let from_parser =
|
|
fun s ->
|
|
(match prev_char s with
|
|
| Some prev when is_alphanum (Char.to_string prev) -> fail "unsat"
|
|
| _ ->
|
|
string from >>= fun from ->
|
|
look_ahead required_from_suffix >>= fun _ ->
|
|
return from) s
|
|
in
|
|
let until_parser s =
|
|
(string until >>= fun until ->
|
|
eof <|> look_ahead (skip required_until_suffix) >>= fun _ ->
|
|
(* if current char/next_char is alphanum, make unsat. *)
|
|
let prev = prev_char s in
|
|
if debug then with_debug_matcher s (`Position "reserved_delimiter_until");
|
|
match prev with
|
|
| Some prev when is_alphanum (Char.to_string prev) -> fail "unsat"
|
|
| _ -> return until
|
|
)
|
|
s
|
|
in
|
|
[from_parser; until_parser]
|
|
in
|
|
let reserved_delimiters =
|
|
List.concat_map Syntax.user_defined_delimiters ~f:(fun (from, until) ->
|
|
if is_alphanum from && is_alphanum until then
|
|
handle_alphanum_delimiters_reserved_trigger from until
|
|
else
|
|
[ string from; string until])
|
|
in
|
|
let reserved_escapable_strings =
|
|
match Syntax.escapable_string_literals with
|
|
| Some { delimiters; _ } ->
|
|
List.concat_map delimiters ~f:(fun delimiter -> [delimiter])
|
|
|> List.map ~f:string
|
|
| None -> []
|
|
in
|
|
let reserved_raw_strings =
|
|
List.concat_map Syntax.raw_string_literals ~f:(fun (from, until) -> [from; until])
|
|
|> List.map ~f:string
|
|
in
|
|
reserved_holes ()
|
|
@ reserved_delimiters
|
|
@ reserved_escapable_strings
|
|
@ reserved_raw_strings
|
|
|> List.map ~f:skip
|
|
(* Attempt the reserved: otherwise, if a parser partially succeeds,
|
|
it won't detect that single or greedy is reserved. *)
|
|
|> List.map ~f:attempt
|
|
|> choice
|
|
|
|
let reserved _s =
|
|
reserved_delimiters ()
|
|
<|> skip (space |>> Char.to_string)
|
|
|
|
let until_of_from from =
|
|
Syntax.user_defined_delimiters
|
|
|> List.find_map ~f:(fun (from', until) -> if String.equal from from' then Some until else None)
|
|
|> function
|
|
| Some until -> until
|
|
| None -> assert false
|
|
|
|
let record_matches identifier p : ('c, Match.t) parser =
|
|
get_pos >>= fun (pre_index, pre_line, pre_column) ->
|
|
p >>= fun matched ->
|
|
get_pos >>= fun (post_index, post_line, post_column) ->
|
|
update_user_state
|
|
(fun ({ Match.environment; _ } as result) ->
|
|
if debug then begin
|
|
Format.printf "Updating user state:@.";
|
|
Format.printf "%s |-> %s@." identifier (String.concat matched);
|
|
Format.printf "ID %s: %d:%d:%d - %d:%d:%d@."
|
|
identifier
|
|
pre_index pre_line pre_column
|
|
post_index post_line post_column;
|
|
end;
|
|
let pre_location : Location.t =
|
|
Location.
|
|
{ offset = pre_index
|
|
; line = pre_line
|
|
; column = pre_column
|
|
}
|
|
in
|
|
let post_location : Location.t =
|
|
Location.
|
|
{ offset = post_index
|
|
; line = post_line
|
|
; column = post_column
|
|
}
|
|
in
|
|
let range = { match_start = pre_location; match_end = post_location } in
|
|
let environment =
|
|
if Environment.exists environment identifier && String.(identifier <> "_") then
|
|
let fresh_hole_id =
|
|
Format.sprintf "%s_%s_equal" Uuid_unix.(Fn.compose Uuid.to_string create ()) identifier
|
|
in
|
|
Environment.add ~range environment fresh_hole_id (String.concat matched)
|
|
else
|
|
Environment.add ~range environment identifier (String.concat matched)
|
|
in
|
|
{ result with environment })
|
|
>>= fun () -> f matched
|
|
|
|
let alphanum_delimiter_must_satisfy =
|
|
many1 (is_not (skip (choice reserved_alphanum_delimiter_must_satisfy) <|> skip alphanum))
|
|
|>> String.of_char_list
|
|
|
|
let generate_delimited_hole_parser
|
|
?priority_left_delimiter:left_delimiter
|
|
?priority_right_delimiter:right_delimiter =
|
|
let delimiters =
|
|
if weaken_delimiter_hole_matching then
|
|
match left_delimiter, right_delimiter with
|
|
| Some left_delimiter, Some right_delimiter ->
|
|
[ (left_delimiter, right_delimiter) ]
|
|
| _ -> Syntax.user_defined_delimiters
|
|
else
|
|
Syntax.user_defined_delimiters
|
|
in
|
|
let reserved =
|
|
List.concat_map delimiters ~f:(fun (from, until) ->
|
|
[string from; string until]
|
|
)
|
|
|> List.map ~f:attempt
|
|
|> choice
|
|
in
|
|
(* A parser that understands the hole matching cut off points happen at
|
|
delimiters. *)
|
|
let rec nested_grammar s =
|
|
(comment_parser
|
|
<|> raw_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents)
|
|
<|> escapable_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents)
|
|
<|> (many1 space |>> String.of_char_list)
|
|
<|> (attempt @@ delims_over_holes)
|
|
(* Only consume if not reserved. If it is reserved, we want to trigger the 'many'
|
|
in (many nested_grammar) to continue. *)
|
|
<|> (is_not (reserved <|> (space |>> Char.to_string)) |>> String.of_char))
|
|
s
|
|
and delims_over_holes s =
|
|
let between_nested_delims p =
|
|
let capture_delimiter_result p ~from =
|
|
let until = until_of_from from in
|
|
between (string from) (string until) p
|
|
>>= fun result -> return (String.concat @@ [from] @ result @ [until])
|
|
in
|
|
delimiters
|
|
|> List.map ~f:(fun pair -> capture_delimiter_result p ~from:(fst pair))
|
|
|> choice
|
|
in
|
|
(between_nested_delims (many nested_grammar)) s
|
|
in
|
|
delims_over_holes
|
|
|
|
let generate_everything_hole_parser
|
|
?priority_left_delimiter:left_delimiter
|
|
?priority_right_delimiter:right_delimiter =
|
|
let with_debug_hole tag =
|
|
match tag with
|
|
| `Spaces chars ->
|
|
if debug_hole then Format.printf "<H_sp>";
|
|
return (String.of_char_list chars)
|
|
| `Delimited delimited ->
|
|
if debug_hole then Format.printf "<H_d>%s</H_d>" delimited;
|
|
return delimited
|
|
| `Character c ->
|
|
if debug_hole then Format.printf "<H_c>%c</H_c>" c;
|
|
return (String.of_char c)
|
|
| `Body body ->
|
|
let body = String.concat body in
|
|
if debug_hole then Format.printf "<H_body>%s</H_body>" body;
|
|
return body
|
|
| `Checkpoint (label, s) ->
|
|
if debug_hole then Format.printf "Point(%s):<d>%s</d>" label s;
|
|
return s
|
|
| _ -> assert false
|
|
in
|
|
let delimiters =
|
|
if weaken_delimiter_hole_matching then
|
|
match left_delimiter, right_delimiter with
|
|
| Some left_delimiter, Some right_delimiter ->
|
|
[ (left_delimiter, right_delimiter) ]
|
|
| _ -> Syntax.user_defined_delimiters
|
|
else
|
|
Syntax.user_defined_delimiters
|
|
in
|
|
let handle_alphanum_delimiters from until p =
|
|
(* mandatory_prefix: needs to be not alphanum AND not non-alphanum delim.
|
|
If it is a paren, we need to fail and get out of this alphanum block
|
|
parser (how did we end up in here? because we said that we'd accept
|
|
anything as prefix to 'def', including '(', and so '(' is not handled
|
|
as a delim.) *)
|
|
let mandatory_prefix = alphanum_delimiter_must_satisfy in
|
|
(* mandatory_suffix: be more strict with suffix of opening delimiter: don't
|
|
use 'any non-alphanum', but instead use whitespace. This since 'def;'
|
|
is undesirable, and 'def.foo' may be intentional. But 'end.' or 'end;'
|
|
probably still refer to a closing delim. *)
|
|
let mandatory_suffix = choice reserved_alphanum_delimiter_must_satisfy <|> whitespace in
|
|
let satisfy_opening_delimiter prev =
|
|
(match prev with
|
|
| Some prev when is_alphanum (Char.to_string prev) -> fail "unsat"
|
|
(* Try parse whitespace, and we want to capture its length, in case
|
|
this is a space between, like 'def def end end'. But in the case
|
|
where there's no space, it means we have just entered the beginning
|
|
of the hole which may start with the 'd' of 'def', but since we
|
|
already know that the previous char is not alphanum in this branch
|
|
(so it is a delimiter or whitespace) it is OK to continue: in this
|
|
case, return "". *)
|
|
| _ -> mandatory_prefix <|> return "")
|
|
>>= fun prefix ->
|
|
string from >>= fun open_delimiter ->
|
|
with_debug_hole (`Checkpoint ("open_delimiter_<pre>"^prefix^"</pre>_sat_for", open_delimiter)) >>= fun _ ->
|
|
(* Use look_ahead to ensure that there is, e.g., whitespace after this
|
|
possible delimiter, but without consuming input. Whitespace needs to
|
|
not be consumed so that we can detect subsequent delimiters. *)
|
|
look_ahead mandatory_suffix >>= fun suffix ->
|
|
with_debug_hole (`Checkpoint ("open_delimiter_<suf>"^suffix^"</suf>_sat_for", open_delimiter)) >>= fun _ ->
|
|
return (prefix, open_delimiter)
|
|
in
|
|
let satisfy_closing_delimiter =
|
|
string until >>= fun close_delimiter ->
|
|
look_ahead @@ mandatory_suffix >>= fun suffix ->
|
|
with_debug_hole (`Checkpoint ("close_delimiter_<suf>"^suffix^"</suf>_sat_for", close_delimiter)) >>= fun _ ->
|
|
return close_delimiter
|
|
in
|
|
(fun s ->
|
|
let prev = prev_char s in
|
|
(satisfy_opening_delimiter prev >>= fun (prefix, open_delimiter) ->
|
|
p >>= fun in_between ->
|
|
with_debug_hole (`Body in_between) >>= fun in_between ->
|
|
satisfy_closing_delimiter >>= fun close_delimiter ->
|
|
return
|
|
((prefix^open_delimiter)
|
|
^in_between
|
|
^close_delimiter)) s)
|
|
(* Use attempt so that, e.g., 'struct' is tried after 'begin' delimiters under choice. *)
|
|
|> attempt
|
|
in
|
|
let handle_alphanum_delimiters_reserved_trigger from until =
|
|
(* If it's alphanum, only consider it reserved if there is, say, whitespace after and so
|
|
handle alternatively. Otherwise, return empty to indicate 'this sequence of characters
|
|
is not reserved'. *)
|
|
let reserved =
|
|
Syntax.user_defined_delimiters
|
|
|> List.filter_map ~f:(fun (from, _) ->
|
|
if not (is_alphanum from) then
|
|
Some from
|
|
else
|
|
None)
|
|
|> List.map ~f:string
|
|
|> List.map ~f:attempt
|
|
in
|
|
let required_delimiter_terminal =
|
|
many1 (is_not (skip (choice reserved) <|> skip alphanum)) |>> String.of_char_list
|
|
in
|
|
List.map [from; until] ~f:(fun delimiter ->
|
|
(fun s ->
|
|
let prev = prev_char s in
|
|
(match prev with
|
|
| Some prev when is_alphanum (Char.to_string prev) ->
|
|
(* If prev is alphanum, this can't possibly be a reserved delimiter. Just continue. *)
|
|
fail "unsat"
|
|
| _ -> string delimiter >>= fun _ ->
|
|
look_ahead required_delimiter_terminal) s))
|
|
in
|
|
(* The cases for which we need to stop parsing just characters
|
|
and consider delimiters. *)
|
|
let reserved =
|
|
List.concat_map delimiters ~f:(fun (from, until) ->
|
|
if is_alphanum from then
|
|
handle_alphanum_delimiters_reserved_trigger from until
|
|
else
|
|
[string from; string until]
|
|
)
|
|
|> List.map ~f:attempt
|
|
|> choice
|
|
in
|
|
(* A parser that understands the hole matching cut off points happen at
|
|
delimiters. *)
|
|
let rec nested_grammar s =
|
|
(comment_parser
|
|
<|> raw_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents)
|
|
<|> escapable_string_literal_parser (fun ~contents ~left_delimiter:_ ~right_delimiter:_ -> contents)
|
|
<|> (many1 space >>= fun r -> with_debug_hole (`Spaces r))
|
|
<|> (attempt @@ delims_over_holes >>= fun r -> with_debug_hole (`Delimited r))
|
|
(* Only consume if not reserved. If it is reserved, we want to trigger the 'many'
|
|
in (many nested_grammar) to continue. *)
|
|
<|> (is_not (reserved <|> (space |>> Char.to_string)) >>= fun r -> with_debug_hole (`Character r)))
|
|
s
|
|
and delims_over_holes s =
|
|
let between_nested_delims p =
|
|
let capture_delimiter_result p ~from =
|
|
let until = until_of_from from in
|
|
if is_alphanum from then
|
|
handle_alphanum_delimiters from until p
|
|
else
|
|
between (string from) (string until) p
|
|
>>= fun result -> return (String.concat @@ [from] @ result @ [until])
|
|
in
|
|
delimiters
|
|
|> List.map ~f:(fun pair -> capture_delimiter_result p ~from:(fst pair))
|
|
|> choice
|
|
in
|
|
(between_nested_delims (many nested_grammar)) s
|
|
in
|
|
nested_grammar
|
|
|
|
let coalesce_whitespace prefix_parser suffix_parser =
|
|
let is_whitespace p =
|
|
match
|
|
parse_string p " " (Match.create ()),
|
|
(* suffix parser could be a hole. It needs to fail on
|
|
parsing something like "X" to be a whitespace parser *)
|
|
parse_string p "X" (Match.create ())
|
|
with
|
|
| Success _, Failed _ -> true
|
|
| _ -> false
|
|
in
|
|
let pre = is_whitespace prefix_parser in
|
|
if debug then Format.printf "Pre: %b@." pre;
|
|
let suf = is_whitespace suffix_parser in
|
|
if debug then Format.printf "Suf: %b@." suf;
|
|
pre && suf
|
|
|
|
let prefix_parser p_list i =
|
|
match List.nth (List.rev p_list) (i+1) with
|
|
| Some p -> p
|
|
| None ->
|
|
if debug then Format.printf "Prefix parser unsat@.";
|
|
fail "unsat"
|
|
|
|
let turn_holes_into_matchers_for_this_level ?left_delimiter ?right_delimiter p_list =
|
|
List.foldi (List.rev p_list) ~init:[] ~f:(fun i acc p ->
|
|
match parse_string p "_signal_hole" (Match.create ()) with
|
|
| Failed _ -> p::acc
|
|
| Success Hole { sort; identifier; optional; dimension } ->
|
|
begin
|
|
match sort with
|
|
| Alphanum ->
|
|
let allowed = choice [alphanum; char '_'] |>> String.of_char in
|
|
let hole_semantics = many1 allowed in
|
|
begin match optional with
|
|
| false -> (record_matches identifier hole_semantics)::acc
|
|
| true ->
|
|
if debug then Format.printf "Optional active@.";
|
|
match acc with
|
|
| [] ->
|
|
let hole_semantics = opt [] (attempt hole_semantics) in
|
|
(record_matches identifier hole_semantics)::acc
|
|
| (suffix::rest) as acc ->
|
|
let optional_succeeds_parser =
|
|
record_matches identifier hole_semantics
|
|
>> sequence_chain acc
|
|
in
|
|
let optional_fails_parser =
|
|
record_matches identifier (return []) >>= fun _ ->
|
|
if coalesce_whitespace (prefix_parser p_list i) suffix then
|
|
sequence_chain rest
|
|
else
|
|
sequence_chain acc
|
|
in
|
|
[(attempt optional_succeeds_parser)
|
|
<|> optional_fails_parser]
|
|
end
|
|
|
|
| Non_space ->
|
|
let allowed =
|
|
[skip space; reserved_delimiters ()]
|
|
|> choice
|
|
|> is_not
|
|
|>> Char.to_string
|
|
in
|
|
let rest =
|
|
match acc with
|
|
| [] -> eof >>= fun () -> f [""]
|
|
| _ -> sequence_chain acc
|
|
in
|
|
let hole_semantics = many1 (not_followed_by rest "" >> allowed) in
|
|
let hole_semantics =
|
|
if not optional then
|
|
hole_semantics
|
|
else
|
|
opt [] (attempt hole_semantics)
|
|
in
|
|
(record_matches identifier hole_semantics)::acc
|
|
|
|
| Line ->
|
|
let allowed =
|
|
many (is_not (char '\n'))
|
|
|>> fun x -> [(String.of_char_list x)^"\n"]
|
|
in
|
|
let hole_semantics = allowed << char '\n' in
|
|
let hole_semantics =
|
|
if not optional then
|
|
hole_semantics
|
|
else
|
|
opt [] (attempt hole_semantics)
|
|
in
|
|
(record_matches identifier hole_semantics)::acc
|
|
|
|
| Blank ->
|
|
let allowed = blank |>> String.of_char in
|
|
let hole_semantics = many1 allowed in
|
|
let hole_semantics =
|
|
if not optional then
|
|
hole_semantics
|
|
else
|
|
opt [] (attempt hole_semantics)
|
|
in
|
|
(record_matches identifier hole_semantics)::acc
|
|
|
|
| Expression ->
|
|
let non_space =
|
|
[skip space; reserved_delimiters ()]
|
|
|> choice
|
|
|> is_not
|
|
|>> Char.to_string
|
|
in
|
|
let delimited =
|
|
generate_delimited_hole_parser
|
|
?priority_left_delimiter:left_delimiter
|
|
?priority_right_delimiter:right_delimiter
|
|
in
|
|
let matcher = non_space <|> delimited in
|
|
let rest =
|
|
match acc with
|
|
| [] -> eof >>= fun () -> f [""]
|
|
| _ -> sequence_chain acc
|
|
in
|
|
let hole_semantics = many1 (not_followed_by rest "" >> matcher) in
|
|
(record_matches identifier hole_semantics)::acc
|
|
|
|
| Everything ->
|
|
let matcher =
|
|
match dimension with
|
|
| Code ->
|
|
generate_everything_hole_parser
|
|
?priority_left_delimiter:left_delimiter
|
|
?priority_right_delimiter:right_delimiter
|
|
| Escapable_string_literal ->
|
|
let right_delimiter = Option.value_exn right_delimiter in
|
|
escapable_literal_grammar ~right_delimiter
|
|
| Raw_string_literal ->
|
|
let right_delimiter = Option.value_exn right_delimiter in
|
|
raw_literal_grammar ~right_delimiter
|
|
| Comment -> failwith "Unimplemented"
|
|
in
|
|
match optional with
|
|
| false ->
|
|
let rest =
|
|
match acc with
|
|
| [] -> eof >>= fun () -> f [""]
|
|
| _ -> sequence_chain acc
|
|
in
|
|
(* Continue until rest, but don't consume rest. acc will
|
|
propagate the rest that needs to be consumed. *)
|
|
let hole_semantics = many (not_followed_by rest "" >> matcher) in
|
|
(record_matches identifier hole_semantics)::acc
|
|
| true ->
|
|
if debug then Format.printf "Optional active@.";
|
|
match acc with
|
|
| [] ->
|
|
let rest = eof >>= fun () -> f [""] in
|
|
let hole_semantics = many (not_followed_by rest "" >> matcher) in
|
|
(* Try match ordinary hole semantics, but if the parser
|
|
fails, just let it pass which leads to assigning "" to
|
|
identifier *)
|
|
let hole_semantics = opt [] (attempt hole_semantics) in
|
|
(record_matches identifier hole_semantics)::acc
|
|
| (suffix::rest) as acc ->
|
|
let after = sequence_chain acc in
|
|
let hole_semantics = many (not_followed_by after "" >> matcher) in
|
|
(* The logic goes: Try to match ordinary hole semantics, and
|
|
propagate acc if it succeeds. If ordinary semantics fail,
|
|
let it pass and coalesce whitespace with prefix/suffix if
|
|
needed and only propagate 'rest', since we remove the
|
|
suffix by coalescing. *)
|
|
let optional_succeeds_parser =
|
|
(* This parser can succeed but does not consume after.
|
|
After must still be consumed (acc must be propagated).
|
|
*)
|
|
record_matches identifier hole_semantics
|
|
>>= fun _ ->
|
|
if debug then Format.printf "Optional record succeeds.@.";
|
|
sequence_chain acc >>= fun r ->
|
|
if debug then Format.printf "Rest succeeds.@.";
|
|
return r
|
|
in
|
|
let optional_fails_parser =
|
|
if debug then Format.printf "Optional fail case@.";
|
|
(* The optional parser that kicks in if
|
|
optional_succeeds_parser fails. It does not consume
|
|
anything. *)
|
|
(record_matches identifier (return [])) >>= fun _ ->
|
|
(* Record matches succeeded for optional hole, empty
|
|
match. No going back now. Consume suffix if prefix and
|
|
suffix are whitespace and propagate rest. Otherwise,
|
|
propagate acc. *)
|
|
if coalesce_whitespace (prefix_parser p_list i) suffix then
|
|
sequence_chain rest
|
|
else
|
|
sequence_chain acc
|
|
in
|
|
[(attempt optional_succeeds_parser)
|
|
<|> optional_fails_parser]
|
|
end
|
|
| Success _ -> failwith "Hole expected")
|
|
|
|
let hole_parser sort dimension =
|
|
let open Hole in
|
|
let hole_parser =
|
|
match sort with
|
|
| Everything -> everything_hole_parser ()
|
|
| Expression -> expression_hole_parser ()
|
|
| Non_space -> non_space_hole_parser ()
|
|
| Line -> line_hole_parser ()
|
|
| Blank -> blank_hole_parser ()
|
|
| Alphanum -> alphanum_hole_parser ()
|
|
in
|
|
let skip_signal hole = skip (string "_signal_hole") |>> fun () -> Hole hole in
|
|
hole_parser |>> fun (optional, identifier) -> skip_signal { sort; identifier; dimension; optional }
|
|
|
|
let generate_hole_for_literal sort ~contents ~left_delimiter ~right_delimiter s =
|
|
let holes =
|
|
Hole.sorts ()
|
|
|> List.map ~f:(fun kind -> attempt (hole_parser kind sort))
|
|
in
|
|
let reserved_holes =
|
|
reserved_holes ()
|
|
|> List.map ~f:skip
|
|
|> List.map ~f:attempt
|
|
|> choice
|
|
in
|
|
|
|
let p =
|
|
many
|
|
(choice holes
|
|
<|> ((many1 (is_not (reserved_holes))
|
|
|>> String.of_char_list) |>> generate_string_token_parser))
|
|
in
|
|
match parse_string p contents "" with
|
|
| Success p ->
|
|
begin
|
|
match sort with
|
|
| Escapable_string_literal
|
|
| Raw_string_literal ->
|
|
(turn_holes_into_matchers_for_this_level ~left_delimiter ~right_delimiter p
|
|
|> sequence_chain) s
|
|
| _ -> assert false
|
|
end
|
|
| Failed (_msg, _) ->
|
|
failwith "literal parser did not succeed"
|
|
|
|
let rec generate_parsers s =
|
|
many (common s)
|
|
|
|
and common _s =
|
|
let holes =
|
|
Hole.sorts ()
|
|
|> List.map ~f:(fun kind -> attempt (hole_parser kind Code))
|
|
in
|
|
choice
|
|
[ choice holes
|
|
(* String literals are handled specially because match semantics change inside string delimiters. *)
|
|
; raw_string_literal_parser (generate_hole_for_literal Raw_string_literal)
|
|
; escapable_string_literal_parser (generate_hole_for_literal Escapable_string_literal)
|
|
(* Nested delimiters are handled specially for nestedness. *)
|
|
; nested_delimiters_parser generate_outer_delimiter_parsers
|
|
(* Whitespace is handled specially because we may change whether they are significant for matching. *)
|
|
; spaces1 |>> generate_spaces_parser
|
|
(* Optional: parse identifiers and disallow substring matching *)
|
|
; if !configuration_ref.disable_substring_matching then many1 (alphanum <|> char '_') |>> generate_word else zero
|
|
(* Everything else. *)
|
|
; (many1 (is_not (reserved _s)) >>= fun cl ->
|
|
if debug then Format.printf "<cl>%s</cl>" @@ String.of_char_list cl;
|
|
return @@ String.of_char_list cl)
|
|
|>> generate_string_token_parser
|
|
]
|
|
|
|
and generate_outer_delimiter_parsers ~left_delimiter ~right_delimiter s =
|
|
let before s =
|
|
begin
|
|
if debug_hole then with_debug_matcher s (`Position "generate_outer_delimiter");
|
|
let p =
|
|
if is_alphanum left_delimiter then
|
|
(* This logic is needed for cases where we say 'def :[1] end' in the template,
|
|
and don't match partially on, say, 'adef body endq' in the underlying generated
|
|
parser. *)
|
|
let prev = prev_char s in
|
|
match prev with
|
|
| Some prev when is_alphanum (Char.to_string prev) -> fail "unsat"
|
|
| _ -> string left_delimiter
|
|
else
|
|
string left_delimiter
|
|
in
|
|
p >>= fun _ -> f [left_delimiter]
|
|
end s
|
|
in
|
|
let after =
|
|
let p =
|
|
if is_alphanum right_delimiter then
|
|
(* This handles the case for something like 'def body endly'. *)
|
|
string right_delimiter >>= fun delim ->
|
|
look_ahead @@ (eof <|> skip not_alphanum) >>= fun _ ->
|
|
return delim
|
|
else
|
|
string right_delimiter
|
|
in
|
|
p >>= fun _ -> f [right_delimiter]
|
|
in
|
|
(generate_parsers s >>= fun p_list ->
|
|
(turn_holes_into_matchers_for_this_level
|
|
~left_delimiter
|
|
~right_delimiter
|
|
([before] @ p_list @ [after])
|
|
|> sequence_chain)
|
|
|> return
|
|
) s
|
|
|
|
let general_parser_generator s =
|
|
let outer_p =
|
|
generate_parsers s >>= fun p_list ->
|
|
(* EOF of template is here. *)
|
|
eof >> (* Result is unit so ignore. *)
|
|
(* Customize the inner parser. *)
|
|
let inner_p =
|
|
let matcher : ('a, Match.t) parser =
|
|
turn_holes_into_matchers_for_this_level p_list
|
|
|> sequence_chain
|
|
in
|
|
let matcher : ('a, Match.t) parser =
|
|
let with_positions (matcher : ('a, Match.t) parser) : ('a, Match.t) parser =
|
|
get_pos >>= fun (pre_offset, pre_line, pre_column) ->
|
|
matcher >>= fun _last_production ->
|
|
get_pos >>= fun (post_offset, post_line, post_column) ->
|
|
let match_start =
|
|
{ offset = pre_offset
|
|
; line = pre_line
|
|
; column = pre_column
|
|
} in
|
|
let match_end =
|
|
{ offset = post_offset
|
|
; line = post_line
|
|
; column = post_column
|
|
}
|
|
in
|
|
let range = { match_start; match_end } in
|
|
update_user_state (fun result -> { result with range })
|
|
>> return Unit
|
|
in
|
|
with_positions matcher
|
|
in
|
|
match !configuration_ref.match_kind with
|
|
| Exact -> matcher << eof
|
|
| Fuzzy ->
|
|
many
|
|
(not_followed_by matcher "" >>
|
|
(
|
|
(* Respect grammar but ignore contents up to a match. *)
|
|
skip comment_parser
|
|
<|> skip (raw_string_literal_parser (fun ~contents:_ ~left_delimiter:_ ~right_delimiter:_ -> ()))
|
|
<|> skip (escapable_string_literal_parser (fun ~contents:_ ~left_delimiter:_ ~right_delimiter:_ -> ()))
|
|
<|> skip any_char)
|
|
)
|
|
>> matcher
|
|
in
|
|
return inner_p
|
|
in
|
|
outer_p s
|
|
|
|
let to_template template : ('a, Match.t) MParser.t Or_error.t =
|
|
(* Use a match type for state so we can reuse parsers for inner and outer. *)
|
|
match parse_string general_parser_generator template (Match.create ()) with
|
|
| Success p -> Ok p
|
|
| Failed (msg, _) -> Or_error.error_string msg
|
|
|
|
(** shift: start the scan in the source at an offset *)
|
|
let first' shift p source : Match.t Or_error.t =
|
|
let set_start_pos p = fun s -> p (advance_state s shift) in
|
|
let p = set_start_pos p in
|
|
match parse_string' p source (Match.create ()) with
|
|
| Success (_, result) ->
|
|
if String.is_empty source then
|
|
(* If source is empty and p succeeds, it's the trivial case. We set
|
|
the result manually. *)
|
|
Ok {
|
|
result with
|
|
range =
|
|
{ match_start = { offset = 0; line = 1; column = 1 }
|
|
; match_end = { offset = 0; line = 1; column = 1 }
|
|
}
|
|
}
|
|
else
|
|
Ok result
|
|
| Failed (msg, _) -> Or_error.error_string msg
|
|
|
|
let first ?configuration ?shift template source =
|
|
let open Or_error in
|
|
configuration_ref := Option.value configuration ~default:!configuration_ref;
|
|
to_template template >>= fun p ->
|
|
let shift =
|
|
match shift with
|
|
| Some s -> s
|
|
| None -> 0
|
|
in
|
|
first' shift p source
|
|
|
|
let all ?configuration ~template ~source:original_source : Match.t list =
|
|
let open Or_error in
|
|
configuration_ref := Option.value configuration ~default:!configuration_ref;
|
|
let make_result = function
|
|
| Ok ok -> ok
|
|
| Error _ -> []
|
|
in
|
|
make_result @@ begin
|
|
to_template template >>= fun p ->
|
|
let p =
|
|
if String.is_empty template then
|
|
MParser.(eof >> return Unit)
|
|
else
|
|
p
|
|
in
|
|
let rec aux acc shift =
|
|
match first' shift p original_source with
|
|
| Ok ({range = { match_start; match_end; _ }; _} as result) ->
|
|
let shift = match_end.offset in
|
|
let matched = extract_matched_text original_source match_start match_end in
|
|
let result = { result with matched } in
|
|
if shift >= String.length original_source then
|
|
result :: acc
|
|
else
|
|
aux (result :: acc) shift
|
|
| Error _ -> acc
|
|
in
|
|
let matches = aux [] 0 |> List.rev in
|
|
(* TODO(RVT): reintroduce nested matches *)
|
|
let compute_nested_matches matches = matches in
|
|
let matches = compute_nested_matches matches in
|
|
return matches
|
|
end
|
|
|
|
let set_rewrite_template _ = () (* Unused in alpha matcher *)
|
|
end
|