support reading async from tar

This commit is contained in:
Rijnard van Tonder 2021-11-14 01:59:07 -08:00
parent 08fc85ca8f
commit c1630c84de
8 changed files with 78 additions and 11 deletions

View File

@ -43,6 +43,8 @@ depends: [
"ppx_deriving_yojson" {>= "3.6.0"}
"pcre"
"shell"
"tar"
"tar-unix"
"toml" {>= "6.0.0"}
"bisect_ppx" {with-test & dev & >= "2.5.0"}
]

4
dune
View File

@ -1,6 +1,6 @@
(env
(dev
(flags (:standard -w A-3-4-32-34-39-40-41-42-44-45-48-49-50-57-60-66-67)))
(flags (:standard -w A-3-4-32-34-37-39-40-41-42-44-45-48-49-50-57-60-66-67)))
(release
(flags (:standard -w A-3-4-32-34-39-40-41-42-44-45-48-49-50-57-60-66-67))
(flags (:standard -w A-3-4-32-34-37-39-40-41-42-44-45-48-49-50-57-60-66-67))
(ocamlopt_flags (-O3))))

View File

@ -209,6 +209,7 @@ type anonymous_arguments =
type user_input_options =
{ rule : string
; stdin : bool
; tar : bool
; templates : string list option
; anonymous_arguments : anonymous_arguments option
; file_filters : string list option
@ -255,6 +256,7 @@ type input_source =
| Stdin
| Zip
| Directory
| Tar
module Printer = struct
@ -441,13 +443,15 @@ let emit_errors { input_options; output_options; _ } =
file contents and -diff outputs a unified diff. Choose one of these."
; output_options.overwrite_file_in_place && is_some input_options.zip_file
, "-in-place may not be used with -zip."
; output_options.overwrite_file_in_place && input_options.tar
, "-in-place may not be used with -tar."
; output_options.overwrite_file_in_place && output_options.stdout
, "-in-place may not be used with -stdout."
; output_options.overwrite_file_in_place && output_options.diff
, "-in-place may not be used with -diff."
; Option.is_some output_options.interactive_review
&& (input_options.stdin || Option.is_some input_options.zip_file || input_options.match_only)
, "-review cannot be used with one or more of the following input flags: -stdin, -zip, -match-only."
&& (input_options.stdin || Option.is_some input_options.zip_file || input_options.match_only || input_options.tar)
, "-review cannot be used with one or more of the following input flags: -stdin, -zip, -match-only, -tar."
; Option.is_some output_options.interactive_review
&& (output_options.json_lines
|| output_options.json_only_diff
@ -556,6 +560,8 @@ let emit_warnings { input_options; output_options; _ } =
, "-in-place has no effect when -stdin is used. Ignoring -in-place."
; output_options.count && output_options.json_lines
, "-count and -json-lines is specified. Ignoring -count."
; input_options.stdin && input_options.tar
, "-tar implies -stdin. Ignoring -stdin."
]
in
List.iter warn_on ~f:(function
@ -696,6 +702,7 @@ let create
; zip_file
; match_only
; stdin
; tar
; target_directory
; directory_depth
; exclude_directory_prefix
@ -758,14 +765,16 @@ let create
| None -> file_filters_from_anonymous_args
in
let input_source =
match stdin, zip_file with
| true, _ -> Stdin
| _, Some _ -> Zip
| false, None -> Directory
match stdin, zip_file, tar with
| _, _, true -> Tar
| true, _, _ -> Stdin
| _, Some _, _ -> Zip
| false, None, _ -> Directory
in
let sources =
match input_source with
| Stdin -> `String (In_channel.input_all In_channel.stdin)
| Tar -> `Tar
| Zip ->
let zip_file = Option.value_exn zip_file in
let paths : Zip.entry list =
@ -789,7 +798,7 @@ let create
`Paths paths
in
let overwrite_file_in_place =
if input_source = Zip || input_source = Stdin then
if input_source = Zip || input_source = Stdin || input_source = Tar then
false
else
overwrite_file_in_place

View File

@ -44,6 +44,7 @@ type anonymous_arguments =
type user_input_options =
{ rule : string
; stdin : bool
; tar : bool
; templates : string list option
; anonymous_arguments : anonymous_arguments option
; file_filters : string list option

View File

@ -10,6 +10,7 @@ type batch_input =
type t =
[ batch_input
| `String of string
| `Tar
]
let show_input_kind =

View File

@ -3,4 +3,4 @@
(public_name comby.configuration)
(instrumentation (backend bisect_ppx))
(preprocess (pps ppx_deriving.show ppx_sexp_conv ppx_sexp_message ppx_deriving_yojson))
(libraries comby-kernel comby-semantic comby.patdiff comby.camlzip core yojson ppx_deriving_yojson toml lwt lwt.unix))
(libraries comby-kernel comby-semantic comby.patdiff comby.camlzip core yojson ppx_deriving_yojson toml lwt lwt.unix tar tar-unix))

View File

@ -177,7 +177,7 @@ let write_statistics number_of_matches sources start_time =
| `Zip (zip_file, paths) ->
let lines_of_code = Fold.loc_zip zip_file paths in
lines_of_code, List.length paths
| _ -> failwith "No single path handled here"
| _ -> failwith "No statistics for this input kind"
in
let statistics =
{ number_of_files
@ -241,6 +241,30 @@ let run_interactive
Interactive.run editor default_is_accept count rewrites;
count
module TarReader = struct
open Lwt
type in_channel = Lwt_unix.file_descr
type 'a t = 'a Lwt.t
let really_read fd = Lwt_cstruct.(complete (read fd))
let skip (ifd: Lwt_unix.file_descr) (n: int) =
let buffer_size = 32768 in
let buffer = Cstruct.create buffer_size in
let rec loop (n: int) =
if n <= 0 then Lwt.return_unit
else
let amount = min n buffer_size in
let block = Cstruct.sub buffer 0 amount in
really_read ifd block >>= fun () ->
loop (n - amount) in
loop n
let read_content ifd n =
let buffer = Cstruct.create n in
really_read ifd buffer >>= fun () ->
return (Cstruct.to_string buffer)
end
let run
{ matcher
; sources
@ -301,6 +325,34 @@ let run
| None ->
begin match sources with
| `String source -> per_unit ~input:(String source) ~output_path:None
| `Tar ->
let open Lwt.Infix in
let fd = Lwt_unix.stdin in
let f =
let rec loop () =
Tar_lwt_unix.get_next_header fd >>= function
| None -> Lwt.return 0
| Some header ->
let debug () =
if debug then
Lwt_io.eprintf "Reading file %s\n Size %d\n" header.file_name (Int64.to_int_exn header.Tar.Header.file_size)
else
Lwt.return ()
in
debug () >>= fun () ->
let file_size = Int64.to_int_exn header.Tar.Header.file_size in
if file_size = 0 then
TarReader.skip fd (Tar.Header.compute_zero_padding_length header) >>= fun () ->
loop ()
else
TarReader.read_content fd file_size >>= fun source ->
let n = per_unit ~input:(String source) ~output_path:None in
TarReader.skip fd (Tar.Header.compute_zero_padding_length header) >>= fun () ->
loop () >>= fun n' -> Lwt.return (n+n')
in
loop ()
in
(try Lwt_main.run f with err -> Format.printf "Tar processing error: %s@." (Exn.to_string err); 0)
| #batch_input as sources -> run_batch ~f:per_unit sources compute_mode bound_count
end
| Some interactive_review ->

View File

@ -129,6 +129,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
and ripgrep_args = flag "ripgrep" (optional string) ~aliases:["rg"] ~doc:"flags Activate ripgrep for filtering files. Add flags like '-g *.go' to include or exclude file extensions."
and bound_count = flag "bound-count" (optional int) ~doc:"num Stop running when at least num matches are found (possibly more are returned for parallel jobs)."
and parany = flag "parany" no_arg ~doc:"force comby to use the alternative parany parallel processing library."
and tar = flag "tar" no_arg ~doc:"read tar format from stdin."
and anonymous_arguments =
anon
(maybe
@ -220,6 +221,7 @@ let base_command_parameters : (unit -> 'result) Command.Param.t =
; zip_file
; match_only
; stdin
; tar
; target_directory
; directory_depth
; exclude_directory_prefix