From: Siraaj Khandkar Date: Wed, 14 Nov 2018 15:57:56 +0000 (-0500) Subject: Implement recursive directory walk stream X-Git-Url: https://git.xandkar.net/?a=commitdiff_plain;h=948ee900b86c5935412cf8a53fc4f55260662249;p=dups.git Implement recursive directory walk stream to avoid the issue with newlines when accepting file paths on stdin. --- diff --git a/Makefile b/Makefile index d79ad63..89c5816 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ all: @$(MAKE) -s build build: - @ocamlbuild -cflags '-w A' $(TARGET) + @ocamlbuild -cflags '-w A' -pkg 'unix' $(TARGET) clean: @ocamlbuild -clean diff --git a/dupfiles.ml b/dupfiles.ml index 42647b7..1bd4d64 100644 --- a/dupfiles.ml +++ b/dupfiles.ml @@ -1,13 +1,60 @@ open Printf -module List = ListLabels +module Array = ArrayLabels +module List = ListLabels module Stream : sig - val lines : in_channel -> f:(string -> unit) -> unit + type 'a t + val lines : in_channel -> string t + val rec_file_paths : root:string -> string t + val iter : 'a t -> f:('a -> unit) -> unit end = struct module S = Stream - let lines_of_channel ic = + type 'a t = + 'a S.t + + let rec_file_paths ~root = + let dirs = Queue.create () in + let files = Queue.create () in + Queue.add root dirs; + let explore parent = + Array.iter (Sys.readdir parent) ~f:(fun child -> + let path = Filename.concat parent child in + let {Unix.st_kind = file_kind; _} = Unix.lstat path in + match file_kind with + | Unix.S_REG -> + Queue.add path files + | Unix.S_DIR -> + Queue.add path dirs + | Unix.S_CHR + | Unix.S_BLK + | Unix.S_LNK + | Unix.S_FIFO + | Unix.S_SOCK -> + () + ) + in + let next_dir () = + match Queue.take dirs with + | exception Queue.Empty -> + () + | dir -> + explore dir + in + let next_file () = + match Queue.take files with + | exception Queue.Empty -> + None + | file_path -> + Some file_path + in + S.from (fun _ -> + next_dir (); + next_file () + ) + + let lines ic = S.from (fun _ -> match input_line ic with | exception End_of_file -> @@ -18,14 +65,20 @@ end = struct let iter t ~f = S.iter f t - - let lines ic ~f = - iter (lines_of_channel ic) ~f end -let main ic = +type input = + | Root_path of string + | Paths_on_stdin + +let main input = + let paths = + match input with + | Paths_on_stdin -> Stream.lines stdin + | Root_path root -> Stream.rec_file_paths ~root + in let paths_by_digest = Hashtbl.create 1_000_000 in - Stream.lines ic ~f:(fun path -> + Stream.iter paths ~f:(fun path -> try let digest = Digest.file path in let paths = @@ -50,7 +103,6 @@ let main ic = paths_by_digest let () = - let ic = ref stdin in - Arg.parse [] (fun filename -> ic := open_in filename) ""; - main !ic; - close_in !ic + let input = ref Paths_on_stdin in + Arg.parse [] (fun path -> input := Root_path path) ""; + main !input