3 module Array = ArrayLabels
4 module List = ListLabels
9 val create : (unit -> 'a option) -> 'a t
11 val iter : 'a t -> f:('a -> unit) -> unit
19 S.from (fun _ -> f ())
25 module In_channel : sig
26 val lines : in_channel -> string Stream.t
29 Stream.create (fun () ->
30 match input_line ic with
31 | exception End_of_file ->
38 module Directory : sig
39 val find_files : string -> string Stream.t
42 let dirs = Queue.create () in
43 let files = Queue.create () in
46 Array.iter (Sys.readdir parent) ~f:(fun child ->
47 let path = Filename.concat parent child in
48 let {Unix.st_kind = file_kind; _} = Unix.lstat path in
63 match Queue.take dirs with
64 | exception Queue.Empty ->
70 match Queue.take files with
71 | exception Queue.Empty ->
76 Stream.create (fun () ->
89 | Paths_on_stdin -> In_channel.lines stdin
90 | Root_path root -> Directory.find_files root
92 let paths_by_digest = Hashtbl.create 1_000_000 in
93 let path_count = ref 0 in
94 let t0 = Sys.time () in
95 Stream.iter paths ~f:(fun path ->
98 let digest = Digest.file path in
100 match Hashtbl.find_opt paths_by_digest digest with
106 Hashtbl.replace paths_by_digest digest (path :: paths)
108 eprintf "WARNING: Failed to process %S: %S\n%!" path e
112 let n_paths = List.length paths in
113 if n_paths > 1 then begin
114 printf "%s %d\n%!" (Digest.to_hex digest) n_paths;
115 List.iter paths ~f:(fun path -> printf " %s\n%!" path)
119 let t1 = Sys.time () in
120 eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0)
123 let input = ref Paths_on_stdin in
124 Arg.parse [] (fun path -> input := Root_path path) "";