add support for a chunked match output format (#340)

This commit is contained in:
Rijnard van Tonder 2022-06-15 23:45:56 -07:00 committed by GitHub
parent 1871a4b116
commit 1a48b2fb3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 207 additions and 16 deletions

View File

@ -198,6 +198,7 @@ type output_options =
; substitute_in_place : bool
; count : bool
; interactive_review : interactive_review option
; chunk_matches : int option
}
type anonymous_arguments =
@ -227,8 +228,6 @@ type user_input_options =
; omega : bool
}
type number_of_workers = int
type compute_mode =
[ `Sequential
| `Hack_parallel of int
@ -263,6 +262,7 @@ module Printer = struct
type printable_result =
| Matches of
{ source_path : string option
; source_content : string
; matches : Match.t list
}
| Replacements of
@ -279,6 +279,7 @@ module Printer = struct
type match_only_kind =
| Contents
| Count
| Chunk_matches of int option
type match_output =
| Json_lines
@ -286,13 +287,14 @@ module Printer = struct
val convert : output_options -> match_output
val print : match_output -> string option -> Match.t list -> unit
val print : match_output -> string -> string option -> Match.t list -> unit
end = struct
type match_only_kind =
| Contents
| Count
| Chunk_matches of int option
type match_output =
| Json_lines
@ -300,21 +302,26 @@ module Printer = struct
let convert output_options =
match output_options with
| { chunk_matches = (Some _ as n); _ } -> Match_only (Chunk_matches n)
| { json_lines = true; _ } -> Json_lines
| { count = true; _ } -> Match_only Count
| _ -> Match_only Contents
let print (match_output : match_output) source_path matches =
let ppf = Format.std_formatter in
let pp =
let print (match_output : match_output) source_content source_path matches =
if List.length matches = 0 then
()
else
let ppf = Format.std_formatter in
match match_output with
| Match_only Contents -> Match.pp
| Match_only Count -> Match.pp_match_count
| Json_lines -> Match.pp_json_lines
in
if List.length matches > 0 then
Format.fprintf ppf "%a" pp (source_path, matches)
| Match_only Contents ->
Format.fprintf ppf "%a" Match.pp (source_path, matches)
| Match_only Count ->
Format.fprintf ppf "%a" Match.pp_match_count (source_path, matches)
| Json_lines ->
Format.fprintf ppf "%a" Match.pp_json_lines (source_path, matches)
| Match_only Chunk_matches threshold ->
let chunk_matches = Match.to_chunks ?threshold source_content matches in
Format.fprintf ppf "%a" Match.pp_chunk_matches (source_path, chunk_matches)
end
module Rewrite : sig
@ -474,6 +481,8 @@ let emit_errors { input_options; output_options; _ } =
, "Directory specified with -d or -directory is not a directory."
; output_options.json_only_diff && not output_options.json_lines
, "-json-only-diff can only be supplied with -json-lines."
; (Option.is_some output_options.chunk_matches) && Option.is_some input_options.zip_file
, "chunk-matches output format is not supported for zip files."
; Option.is_some output_options.interactive_review &&
(not (String.equal input_options.target_directory (Sys.getcwd ())))
, "Please remove the -d option and `cd` to the directory where you want to \
@ -562,6 +571,9 @@ let emit_warnings { input_options; output_options; _ } =
, "-count and -json-lines is specified. Ignoring -count."
; input_options.stdin && input_options.tar
, "-tar implies -stdin. Ignoring -stdin."
; (Option.is_some output_options.chunk_matches) && (not (input_options.stdin || input_options.tar))
, "printing chunk match format for output option that is NOT -stdin nor \
-tar. This is very inefficient!"
]
in
List.iter warn_on ~f:(function
@ -807,10 +819,10 @@ let create
let output_printer printable =
let open Printer in
match printable with
| Matches { source_path; matches } ->
| Matches { source_path; matches; source_content } ->
Printer.Match.convert output_options
|> fun match_output ->
Printer.Match.print match_output source_path matches
Printer.Match.print match_output source_content source_path matches
| Replacements { source_path; replacements; result; source_content } ->
Printer.Rewrite.convert output_options
|> fun replacement_output ->

View File

@ -6,6 +6,7 @@ module Printer : sig
type printable_result =
| Matches of
{ source_path : string option
; source_content : string
; matches : Match.t list
}
| Replacements of
@ -33,6 +34,7 @@ type output_options =
; substitute_in_place : bool
; count : bool
; interactive_review : interactive_review option
; chunk_matches : int option
}
type anonymous_arguments =

View File

@ -104,7 +104,16 @@ let output_result output_printer source_path source_content result =
match result with
| Nothing -> ()
| Matches (matches, _) ->
output_printer (Printer.Matches { source_path; matches })
let source_content =
match source_content with
| String content -> content
(* Content is only needed for chunk matches. It's kind of horrible to read
in the file again to output this format--the only efficient case is
when -stdin or -tar is used right now. We warn on it in command
configuration. *)
| Path path -> In_channel.read_all path
in
output_printer (Printer.Matches { source_path; source_content; matches })
| Replacement (replacements, result, _) ->
let source_content =
match source_content with

View File

@ -112,6 +112,19 @@ module Match : sig
search to perform the conversion quickly. *)
val convert_offset : fast:bool -> source:string -> t -> t
type chunk_match =
{ content : string
; start : Location.t
; ranges : Range.t list
}
val chunk_match_to_yojson : chunk_match -> Yojson.Safe.json
val chunk_match_of_yojson : Yojson.Safe.json -> (chunk_match, string) Result.t
val to_chunks : ?threshold:int -> string -> t list -> chunk_match list
val pp_chunk_matches : Format.formatter -> string option * chunk_match list -> unit
(** [pp] is a grep-like formatted printer for matches. It accepts a (optional
file path * match list) *)
val pp : Format.formatter -> string option * t list -> unit
@ -121,6 +134,7 @@ module Match : sig
val pp_json_lines : Format.formatter -> string option * t list -> unit
val pp_match_count : Format.formatter -> string option * t list -> unit
end
type match' = Match.t

View File

@ -5,3 +5,4 @@ module Offset = Offset
include Types
include Match_context
include Match_chunk

View File

@ -85,3 +85,14 @@ val pp : Format.formatter -> string option * t list -> unit
val pp_json_lines : Format.formatter -> string option * t list -> unit
val pp_match_count : Format.formatter -> string option * t list -> unit
type chunk_match =
{ content : string
; start : Location.t
; ranges : Range.t list
}
[@@deriving yojson]
val to_chunks : ?threshold:int -> string -> t list -> chunk_match list
val pp_chunk_matches : Format.formatter -> string option * chunk_match list -> unit

View File

@ -0,0 +1,75 @@
open Core_kernel
open Range
type chunk_match =
{ content : string
; start : Location.t
; ranges : Range.t list
}
[@@deriving yojson]
let slice_source source { match_start = { offset = start; _ }; match_end = { offset = _end; _ } } =
let open Option in
let index f o = f source o '\n' in
let first_line = Option.value ~default:0 (index String.rindex_from start >>| (+) 1) in
let last_line = Option.value ~default:(String.length source) (index String.index_from _end) in
(first_line, String.slice source first_line last_line)
let to_range_chunk source (cover, ranges) =
let offset, content = slice_source source cover in
{ content
; start =
{ offset
; line = cover.match_start.line
; column = 1
}
; ranges
}
let compare left right =
Int.compare left.match_start.offset right.match_start.offset
let to_chunks ?(threshold = 0) source (l : Match_context.t list) =
let _threshold = threshold in (* FIXME: suppress unused *)
List.map l ~f:(fun { range; _ } -> range)
|> List.sort ~compare
|> function
| [] -> []
| hd :: tl ->
List.fold ~init:[(hd, [hd])] tl ~f:(fun acc current ->
let cover, ranges, rest =
match acc with
| (cover, ranges) :: tl -> cover, ranges, tl
| _ -> assert false
in
if cover.match_end.line >= current.match_start.line then
let cover =
if current.match_end.offset > cover.match_end.offset then
{ cover with match_end = current.match_end }
else
cover
in
(cover, ranges @ [current]) :: rest
else
(current, [current]) :: acc)
|> List.rev_map ~f:(to_range_chunk source)
let to_json source_path matches =
let json_matches matches =
matches
|> List.map ~f:chunk_match_to_yojson
|> fun matches ->
`List matches
in
let uri =
match source_path with
| Some path -> `String path
| None -> `Null
in
`Assoc
[ ("uri", uri)
; ("matches", json_matches matches)
]
let pp_chunk_matches ppf (source_path, matches) =
Format.fprintf ppf "%s\n" @@ Yojson.Safe.to_string @@ to_json source_path matches

View File

@ -130,6 +130,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
and bound_count = flag "bound-count" (optional int) ~doc:"num Stop running when at least num matches are found (possibly more are returned for parallel jobs)."
and parany = flag "parany" no_arg ~doc:"force comby to use the alternative parany parallel processing library."
and tar = flag "tar" no_arg ~doc:"read tar format from stdin."
and chunk_matches = flag "chunk-matches" (optional int) ~aliases:[] ~doc:"line threshold Return content bounded by the min and max line numbers of match ranges. Optionally specify the threshold (number of lines) for grouping content together. Implies -match-only and -json-lines."
and anonymous_arguments =
anon
(maybe
@ -211,6 +212,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
| _, _, "arm64" -> `Parany number_of_workers
| _, false, _ -> `Hack_parallel number_of_workers
in
let match_only = match_only || Option.is_some chunk_matches in
let configuration =
Command_configuration.create
{ input_options =
@ -253,6 +255,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
; stdout
; substitute_in_place
; interactive_review
; chunk_matches
}
}
|> function

View File

@ -21,6 +21,7 @@
test_hole_extensions
test_match_offsets
test_generic
test_match_chunk_conversion
test_string_literals
test_c_style_comments
test_nested_comments

View File

@ -0,0 +1,63 @@
open Core
open Test_helpers
open Comby_kernel
open Matchers
let%expect_test "match_chunk_conversion" =
let source = {|a
b
c
foo(bar)
d
e
baz(qux)
f
foo(
bar
baz
qux
)
g|} in
let template = ":[x~\\w+](:[y])" in
let matches =
Alpha.Generic.all ~configuration ~template ~source ()
|> List.map ~f:(Match.convert_offset ~fast:true ~source)
|> Match.to_chunks source
|> (fun m -> `List (List.map m ~f:(Match.chunk_match_to_yojson)))
|> Yojson.Safe.pretty_to_string
in
print_string matches;
[%expect_exact {|[
{
"content": "foo(bar)",
"start": { "offset": 6, "line": 4, "column": 1 },
"ranges": [
{
"start": { "offset": 6, "line": 4, "column": 1 },
"end": { "offset": 14, "line": 4, "column": 9 }
}
]
},
{
"content": "baz(qux)",
"start": { "offset": 19, "line": 7, "column": 1 },
"ranges": [
{
"start": { "offset": 19, "line": 7, "column": 1 },
"end": { "offset": 27, "line": 7, "column": 9 }
}
]
},
{
"content": "foo(\n bar\n baz\n qux\n)",
"start": { "offset": 30, "line": 9, "column": 1 },
"ranges": [
{
"start": { "offset": 30, "line": 9, "column": 1 },
"end": { "offset": 60, "line": 13, "column": 2 }
}
]
}
]|}]