3 module Array = ArrayLabels
4 module List = ListLabels
9 val create : (unit -> 'a option) -> 'a t
11 val iter : 'a t -> f:('a -> unit) -> unit
19 S.from (fun _ -> f ())
25 module In_channel : sig
26 val lines : in_channel -> string Stream.t
29 Stream.create (fun () ->
30 match input_line ic with
31 | exception End_of_file ->
38 module Directory : sig
39 val find_files : string -> string Stream.t
42 let dirs = Queue.create () in
43 let files = Queue.create () in
45 Array.iter (Sys.readdir parent) ~f:(fun child ->
46 let path = Filename.concat parent child in
47 let {Unix.st_kind = file_kind; _} = Unix.lstat path in
63 match Queue.is_empty files, Queue.is_empty dirs with
64 | false, _ -> Some (Queue.take files)
67 explore (Queue.take dirs);
80 | Paths_on_stdin -> In_channel.lines stdin
81 | Root_path root -> Directory.find_files root
83 let paths_by_digest = Hashtbl.create 1_000_000 in
84 let path_count = ref 0 in
85 let t0 = Sys.time () in
86 Stream.iter paths ~f:(fun path ->
89 let digest = Digest.file path in
91 match Hashtbl.find_opt paths_by_digest digest with
97 Hashtbl.replace paths_by_digest digest (path :: paths)
99 eprintf "WARNING: Failed to process %S: %S\n%!" path e
103 let n_paths = List.length paths in
104 if n_paths > 1 then begin
105 printf "%s %d\n%!" (Digest.to_hex digest) n_paths;
106 List.iter paths ~f:(fun path -> printf " %s\n%!" path)
110 let t1 = Sys.time () in
111 eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0)
114 let input = ref Paths_on_stdin in
115 Arg.parse [] (fun path ->
116 if Sys.file_exists path then
117 input := Root_path path
119 eprintf "File does not exist: %S\n%!" path;