From 6b873e5a4a280daaa1c9817ec037a930b3410833 Mon Sep 17 00:00:00 2001 From: Siraaj Khandkar Date: Wed, 28 Nov 2018 18:30:35 -0500 Subject: [PATCH] Handle null-delimited input paths --- dups.ml | 50 ++++++++++++++++++++++++++++++++++++------------- input_delim.mll | 4 ++++ 2 files changed, 41 insertions(+), 13 deletions(-) create mode 100644 input_delim.mll diff --git a/dups.ml b/dups.ml index e214d25..a35894d 100644 --- a/dups.ml +++ b/dups.ml @@ -354,16 +354,27 @@ end = struct end module In_channel : sig - val lines : in_channel -> string Stream.t + val lines : ?delim_null:bool -> in_channel -> string Stream.t end = struct - let lines ic = - Stream.create (fun () -> - match input_line ic with - | exception End_of_file -> - None - | line -> - Some line - ) + let read_until_newline ic () = + match input_line ic with + | exception End_of_file -> + None + | line -> + Some line + + let read_until_null ic = + let lexbuf = Lexing.from_channel ic in + fun () -> Input_delim.by_null lexbuf + + let lines ?(delim_null=false) ic = + let reader = + if delim_null then + read_until_null ic + else + read_until_newline ic + in + Stream.create reader end module File : sig @@ -488,13 +499,14 @@ type opt = ; ignore : string -> bool ; sample : int ; njobs : int + ; delim_null : bool } -let make_input_stream input ignore ~metrics = +let make_input_stream input ignore ~metrics ~delim_null = let input = match input with | Stdin -> - File.lookup (In_channel.lines stdin) + File.lookup (In_channel.lines stdin ~delim_null) | Directories paths -> let paths = StrSet.elements (StrSet.of_list paths) in Stream.concat (List.map paths ~f:File.find) @@ -532,12 +544,12 @@ let time_wall () = let time_proc () = Sys.time () -let main {input; output; ignore; sample = sample_len; njobs} = +let main {input; output; ignore; sample = sample_len; njobs; delim_null} = let wt0_all = time_wall () in let pt0_all = time_proc () in let metrics = M.init () in let output = make_output_fun output in - let input = make_input_stream input ignore ~metrics in + let input = make_input_stream input ignore ~metrics ~delim_null in (* TODO: Make a nice(r) abstraction to re-assemble pieces in the pipeline: * * from input to files_by_size @@ -643,6 +655,7 @@ let get_opt () : opt = let ignore = ref (fun _ -> false) in let sample = ref 512 in let njobs = ref 6 in + let input_delim_null = ref false in let spec = [ ( "-out" , Arg.String (fun path -> @@ -666,6 +679,16 @@ let get_opt () : opt = , Arg.Set_int njobs , (sprintf " Number of parallel jobs. Default: %d" !njobs) ) + ; ( "-0" + , Arg.Set input_delim_null + , ( sprintf + ( " Delimit input paths by null character instead of a newline." + ^^" Meaningful only when reading candidate paths from stdin." + ^^" Default: %B" + ) + !input_delim_null + ) + ) ] in Arg.parse @@ -689,6 +712,7 @@ let get_opt () : opt = ; ignore = !ignore ; sample = !sample ; njobs = !njobs + ; delim_null = !input_delim_null } let () = diff --git a/input_delim.mll b/input_delim.mll new file mode 100644 index 0000000..0d32486 --- /dev/null +++ b/input_delim.mll @@ -0,0 +1,4 @@ +rule by_null = parse +| eof {None} +| [^ '\000']+ as line {Some line} +| '\000'+ {by_null lexbuf} -- 2.20.1