mirror of
https://github.com/comby-tools/comby.git
synced 2024-08-16 16:50:37 +03:00
add support for a chunked match output format (#340)
This commit is contained in:
parent
1871a4b116
commit
1a48b2fb3d
@ -198,6 +198,7 @@ type output_options =
|
||||
; substitute_in_place : bool
|
||||
; count : bool
|
||||
; interactive_review : interactive_review option
|
||||
; chunk_matches : int option
|
||||
}
|
||||
|
||||
type anonymous_arguments =
|
||||
@ -227,8 +228,6 @@ type user_input_options =
|
||||
; omega : bool
|
||||
}
|
||||
|
||||
type number_of_workers = int
|
||||
|
||||
type compute_mode =
|
||||
[ `Sequential
|
||||
| `Hack_parallel of int
|
||||
@ -263,6 +262,7 @@ module Printer = struct
|
||||
type printable_result =
|
||||
| Matches of
|
||||
{ source_path : string option
|
||||
; source_content : string
|
||||
; matches : Match.t list
|
||||
}
|
||||
| Replacements of
|
||||
@ -279,6 +279,7 @@ module Printer = struct
|
||||
type match_only_kind =
|
||||
| Contents
|
||||
| Count
|
||||
| Chunk_matches of int option
|
||||
|
||||
type match_output =
|
||||
| Json_lines
|
||||
@ -286,13 +287,14 @@ module Printer = struct
|
||||
|
||||
val convert : output_options -> match_output
|
||||
|
||||
val print : match_output -> string option -> Match.t list -> unit
|
||||
val print : match_output -> string -> string option -> Match.t list -> unit
|
||||
|
||||
end = struct
|
||||
|
||||
type match_only_kind =
|
||||
| Contents
|
||||
| Count
|
||||
| Chunk_matches of int option
|
||||
|
||||
type match_output =
|
||||
| Json_lines
|
||||
@ -300,21 +302,26 @@ module Printer = struct
|
||||
|
||||
let convert output_options =
|
||||
match output_options with
|
||||
| { chunk_matches = (Some _ as n); _ } -> Match_only (Chunk_matches n)
|
||||
| { json_lines = true; _ } -> Json_lines
|
||||
| { count = true; _ } -> Match_only Count
|
||||
| _ -> Match_only Contents
|
||||
|
||||
let print (match_output : match_output) source_path matches =
|
||||
let ppf = Format.std_formatter in
|
||||
let pp =
|
||||
let print (match_output : match_output) source_content source_path matches =
|
||||
if List.length matches = 0 then
|
||||
()
|
||||
else
|
||||
let ppf = Format.std_formatter in
|
||||
match match_output with
|
||||
| Match_only Contents -> Match.pp
|
||||
| Match_only Count -> Match.pp_match_count
|
||||
| Json_lines -> Match.pp_json_lines
|
||||
in
|
||||
if List.length matches > 0 then
|
||||
Format.fprintf ppf "%a" pp (source_path, matches)
|
||||
|
||||
| Match_only Contents ->
|
||||
Format.fprintf ppf "%a" Match.pp (source_path, matches)
|
||||
| Match_only Count ->
|
||||
Format.fprintf ppf "%a" Match.pp_match_count (source_path, matches)
|
||||
| Json_lines ->
|
||||
Format.fprintf ppf "%a" Match.pp_json_lines (source_path, matches)
|
||||
| Match_only Chunk_matches threshold ->
|
||||
let chunk_matches = Match.to_chunks ?threshold source_content matches in
|
||||
Format.fprintf ppf "%a" Match.pp_chunk_matches (source_path, chunk_matches)
|
||||
end
|
||||
|
||||
module Rewrite : sig
|
||||
@ -474,6 +481,8 @@ let emit_errors { input_options; output_options; _ } =
|
||||
, "Directory specified with -d or -directory is not a directory."
|
||||
; output_options.json_only_diff && not output_options.json_lines
|
||||
, "-json-only-diff can only be supplied with -json-lines."
|
||||
; (Option.is_some output_options.chunk_matches) && Option.is_some input_options.zip_file
|
||||
, "chunk-matches output format is not supported for zip files."
|
||||
; Option.is_some output_options.interactive_review &&
|
||||
(not (String.equal input_options.target_directory (Sys.getcwd ())))
|
||||
, "Please remove the -d option and `cd` to the directory where you want to \
|
||||
@ -562,6 +571,9 @@ let emit_warnings { input_options; output_options; _ } =
|
||||
, "-count and -json-lines is specified. Ignoring -count."
|
||||
; input_options.stdin && input_options.tar
|
||||
, "-tar implies -stdin. Ignoring -stdin."
|
||||
; (Option.is_some output_options.chunk_matches) && (not (input_options.stdin || input_options.tar))
|
||||
, "printing chunk match format for output option that is NOT -stdin nor \
|
||||
-tar. This is very inefficient!"
|
||||
]
|
||||
in
|
||||
List.iter warn_on ~f:(function
|
||||
@ -807,10 +819,10 @@ let create
|
||||
let output_printer printable =
|
||||
let open Printer in
|
||||
match printable with
|
||||
| Matches { source_path; matches } ->
|
||||
| Matches { source_path; matches; source_content } ->
|
||||
Printer.Match.convert output_options
|
||||
|> fun match_output ->
|
||||
Printer.Match.print match_output source_path matches
|
||||
Printer.Match.print match_output source_content source_path matches
|
||||
| Replacements { source_path; replacements; result; source_content } ->
|
||||
Printer.Rewrite.convert output_options
|
||||
|> fun replacement_output ->
|
||||
|
@ -6,6 +6,7 @@ module Printer : sig
|
||||
type printable_result =
|
||||
| Matches of
|
||||
{ source_path : string option
|
||||
; source_content : string
|
||||
; matches : Match.t list
|
||||
}
|
||||
| Replacements of
|
||||
@ -33,6 +34,7 @@ type output_options =
|
||||
; substitute_in_place : bool
|
||||
; count : bool
|
||||
; interactive_review : interactive_review option
|
||||
; chunk_matches : int option
|
||||
}
|
||||
|
||||
type anonymous_arguments =
|
||||
|
@ -104,7 +104,16 @@ let output_result output_printer source_path source_content result =
|
||||
match result with
|
||||
| Nothing -> ()
|
||||
| Matches (matches, _) ->
|
||||
output_printer (Printer.Matches { source_path; matches })
|
||||
let source_content =
|
||||
match source_content with
|
||||
| String content -> content
|
||||
(* Content is only needed for chunk matches. It's kind of horrible to read
|
||||
in the file again to output this format--the only efficient case is
|
||||
when -stdin or -tar is used right now. We warn on it in command
|
||||
configuration. *)
|
||||
| Path path -> In_channel.read_all path
|
||||
in
|
||||
output_printer (Printer.Matches { source_path; source_content; matches })
|
||||
| Replacement (replacements, result, _) ->
|
||||
let source_content =
|
||||
match source_content with
|
||||
|
@ -112,6 +112,19 @@ module Match : sig
|
||||
search to perform the conversion quickly. *)
|
||||
val convert_offset : fast:bool -> source:string -> t -> t
|
||||
|
||||
type chunk_match =
|
||||
{ content : string
|
||||
; start : Location.t
|
||||
; ranges : Range.t list
|
||||
}
|
||||
|
||||
val chunk_match_to_yojson : chunk_match -> Yojson.Safe.json
|
||||
val chunk_match_of_yojson : Yojson.Safe.json -> (chunk_match, string) Result.t
|
||||
|
||||
val to_chunks : ?threshold:int -> string -> t list -> chunk_match list
|
||||
|
||||
val pp_chunk_matches : Format.formatter -> string option * chunk_match list -> unit
|
||||
|
||||
(** [pp] is a grep-like formatted printer for matches. It accepts a (optional
|
||||
file path * match list) *)
|
||||
val pp : Format.formatter -> string option * t list -> unit
|
||||
@ -121,6 +134,7 @@ module Match : sig
|
||||
val pp_json_lines : Format.formatter -> string option * t list -> unit
|
||||
|
||||
val pp_match_count : Format.formatter -> string option * t list -> unit
|
||||
|
||||
end
|
||||
|
||||
type match' = Match.t
|
||||
|
@ -5,3 +5,4 @@ module Offset = Offset
|
||||
|
||||
include Types
|
||||
include Match_context
|
||||
include Match_chunk
|
||||
|
@ -85,3 +85,14 @@ val pp : Format.formatter -> string option * t list -> unit
|
||||
val pp_json_lines : Format.formatter -> string option * t list -> unit
|
||||
|
||||
val pp_match_count : Format.formatter -> string option * t list -> unit
|
||||
|
||||
type chunk_match =
|
||||
{ content : string
|
||||
; start : Location.t
|
||||
; ranges : Range.t list
|
||||
}
|
||||
[@@deriving yojson]
|
||||
|
||||
val to_chunks : ?threshold:int -> string -> t list -> chunk_match list
|
||||
|
||||
val pp_chunk_matches : Format.formatter -> string option * chunk_match list -> unit
|
||||
|
75
lib/kernel/match/match_chunk.ml
Normal file
75
lib/kernel/match/match_chunk.ml
Normal file
@ -0,0 +1,75 @@
|
||||
open Core_kernel
|
||||
open Range
|
||||
|
||||
type chunk_match =
|
||||
{ content : string
|
||||
; start : Location.t
|
||||
; ranges : Range.t list
|
||||
}
|
||||
[@@deriving yojson]
|
||||
|
||||
let slice_source source { match_start = { offset = start; _ }; match_end = { offset = _end; _ } } =
|
||||
let open Option in
|
||||
let index f o = f source o '\n' in
|
||||
let first_line = Option.value ~default:0 (index String.rindex_from start >>| (+) 1) in
|
||||
let last_line = Option.value ~default:(String.length source) (index String.index_from _end) in
|
||||
(first_line, String.slice source first_line last_line)
|
||||
|
||||
let to_range_chunk source (cover, ranges) =
|
||||
let offset, content = slice_source source cover in
|
||||
{ content
|
||||
; start =
|
||||
{ offset
|
||||
; line = cover.match_start.line
|
||||
; column = 1
|
||||
}
|
||||
; ranges
|
||||
}
|
||||
|
||||
let compare left right =
|
||||
Int.compare left.match_start.offset right.match_start.offset
|
||||
|
||||
let to_chunks ?(threshold = 0) source (l : Match_context.t list) =
|
||||
let _threshold = threshold in (* FIXME: suppress unused *)
|
||||
List.map l ~f:(fun { range; _ } -> range)
|
||||
|> List.sort ~compare
|
||||
|> function
|
||||
| [] -> []
|
||||
| hd :: tl ->
|
||||
List.fold ~init:[(hd, [hd])] tl ~f:(fun acc current ->
|
||||
let cover, ranges, rest =
|
||||
match acc with
|
||||
| (cover, ranges) :: tl -> cover, ranges, tl
|
||||
| _ -> assert false
|
||||
in
|
||||
if cover.match_end.line >= current.match_start.line then
|
||||
let cover =
|
||||
if current.match_end.offset > cover.match_end.offset then
|
||||
{ cover with match_end = current.match_end }
|
||||
else
|
||||
cover
|
||||
in
|
||||
(cover, ranges @ [current]) :: rest
|
||||
else
|
||||
(current, [current]) :: acc)
|
||||
|> List.rev_map ~f:(to_range_chunk source)
|
||||
|
||||
let to_json source_path matches =
|
||||
let json_matches matches =
|
||||
matches
|
||||
|> List.map ~f:chunk_match_to_yojson
|
||||
|> fun matches ->
|
||||
`List matches
|
||||
in
|
||||
let uri =
|
||||
match source_path with
|
||||
| Some path -> `String path
|
||||
| None -> `Null
|
||||
in
|
||||
`Assoc
|
||||
[ ("uri", uri)
|
||||
; ("matches", json_matches matches)
|
||||
]
|
||||
|
||||
let pp_chunk_matches ppf (source_path, matches) =
|
||||
Format.fprintf ppf "%s\n" @@ Yojson.Safe.to_string @@ to_json source_path matches
|
@ -130,6 +130,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
|
||||
and bound_count = flag "bound-count" (optional int) ~doc:"num Stop running when at least num matches are found (possibly more are returned for parallel jobs)."
|
||||
and parany = flag "parany" no_arg ~doc:"force comby to use the alternative parany parallel processing library."
|
||||
and tar = flag "tar" no_arg ~doc:"read tar format from stdin."
|
||||
and chunk_matches = flag "chunk-matches" (optional int) ~aliases:[] ~doc:"line threshold Return content bounded by the min and max line numbers of match ranges. Optionally specify the threshold (number of lines) for grouping content together. Implies -match-only and -json-lines."
|
||||
and anonymous_arguments =
|
||||
anon
|
||||
(maybe
|
||||
@ -211,6 +212,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
|
||||
| _, _, "arm64" -> `Parany number_of_workers
|
||||
| _, false, _ -> `Hack_parallel number_of_workers
|
||||
in
|
||||
let match_only = match_only || Option.is_some chunk_matches in
|
||||
let configuration =
|
||||
Command_configuration.create
|
||||
{ input_options =
|
||||
@ -253,6 +255,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
|
||||
; stdout
|
||||
; substitute_in_place
|
||||
; interactive_review
|
||||
; chunk_matches
|
||||
}
|
||||
}
|
||||
|> function
|
||||
|
@ -21,6 +21,7 @@
|
||||
test_hole_extensions
|
||||
test_match_offsets
|
||||
test_generic
|
||||
test_match_chunk_conversion
|
||||
test_string_literals
|
||||
test_c_style_comments
|
||||
test_nested_comments
|
||||
|
63
test/common/test_match_chunk_conversion.ml
Normal file
63
test/common/test_match_chunk_conversion.ml
Normal file
@ -0,0 +1,63 @@
|
||||
open Core
|
||||
|
||||
open Test_helpers
|
||||
open Comby_kernel
|
||||
|
||||
open Matchers
|
||||
|
||||
let%expect_test "match_chunk_conversion" =
|
||||
let source = {|a
|
||||
b
|
||||
c
|
||||
foo(bar)
|
||||
d
|
||||
e
|
||||
baz(qux)
|
||||
f
|
||||
foo(
|
||||
bar
|
||||
baz
|
||||
qux
|
||||
)
|
||||
g|} in
|
||||
let template = ":[x~\\w+](:[y])" in
|
||||
let matches =
|
||||
Alpha.Generic.all ~configuration ~template ~source ()
|
||||
|> List.map ~f:(Match.convert_offset ~fast:true ~source)
|
||||
|> Match.to_chunks source
|
||||
|> (fun m -> `List (List.map m ~f:(Match.chunk_match_to_yojson)))
|
||||
|> Yojson.Safe.pretty_to_string
|
||||
in
|
||||
print_string matches;
|
||||
[%expect_exact {|[
|
||||
{
|
||||
"content": "foo(bar)",
|
||||
"start": { "offset": 6, "line": 4, "column": 1 },
|
||||
"ranges": [
|
||||
{
|
||||
"start": { "offset": 6, "line": 4, "column": 1 },
|
||||
"end": { "offset": 14, "line": 4, "column": 9 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"content": "baz(qux)",
|
||||
"start": { "offset": 19, "line": 7, "column": 1 },
|
||||
"ranges": [
|
||||
{
|
||||
"start": { "offset": 19, "line": 7, "column": 1 },
|
||||
"end": { "offset": 27, "line": 7, "column": 9 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"content": "foo(\n bar\n baz\n qux\n)",
|
||||
"start": { "offset": 30, "line": 9, "column": 1 },
|
||||
"ranges": [
|
||||
{
|
||||
"start": { "offset": 30, "line": 9, "column": 1 },
|
||||
"end": { "offset": 60, "line": 13, "column": 2 }
|
||||
}
|
||||
]
|
||||
}
|
||||
]|}]
|
Loading…
Reference in New Issue
Block a user