+++ /dev/null
-open Printf
-
-module Array = ArrayLabels
-module List = ListLabels
-
-module Stream : sig
- type 'a t
- val lines : in_channel -> string t
- val rec_file_paths : root:string -> string t
- val iter : 'a t -> f:('a -> unit) -> unit
-end = struct
- module S = Stream
-
- type 'a t =
- 'a S.t
-
- let rec_file_paths ~root =
- let dirs = Queue.create () in
- let files = Queue.create () in
- Queue.add root dirs;
- let explore parent =
- Array.iter (Sys.readdir parent) ~f:(fun child ->
- let path = Filename.concat parent child in
- let {Unix.st_kind = file_kind; _} = Unix.lstat path in
- match file_kind with
- | Unix.S_REG ->
- Queue.add path files
- | Unix.S_DIR ->
- Queue.add path dirs
- | Unix.S_CHR
- | Unix.S_BLK
- | Unix.S_LNK
- | Unix.S_FIFO
- | Unix.S_SOCK ->
- ()
- )
- in
- let next_dir () =
- match Queue.take dirs with
- | exception Queue.Empty ->
- ()
- | dir ->
- explore dir
- in
- let next_file () =
- match Queue.take files with
- | exception Queue.Empty ->
- None
- | file_path ->
- Some file_path
- in
- S.from (fun _ ->
- next_dir ();
- next_file ()
- )
-
- let lines ic =
- S.from (fun _ ->
- match input_line ic with
- | exception End_of_file ->
- None
- | line ->
- Some line
- )
-
- let iter t ~f =
- S.iter f t
-end
-
-type input =
- | Root_path of string
- | Paths_on_stdin
-
-let main input =
- let paths =
- match input with
- | Paths_on_stdin -> Stream.lines stdin
- | Root_path root -> Stream.rec_file_paths ~root
- in
- let paths_by_digest = Hashtbl.create 1_000_000 in
- let path_count = ref 0 in
- let t0 = Sys.time () in
- Stream.iter paths ~f:(fun path ->
- incr path_count;
- try
- let digest = Digest.file path in
- let paths =
- match Hashtbl.find_opt paths_by_digest digest with
- | None ->
- []
- | Some paths ->
- paths
- in
- Hashtbl.replace paths_by_digest digest (path :: paths)
- with Sys_error e ->
- eprintf "WARNING: Failed to process %S: %S\n%!" path e
- );
- Hashtbl.iter
- (fun digest paths ->
- let n_paths = List.length paths in
- if n_paths > 1 then begin
- printf "%s %d\n%!" (Digest.to_hex digest) n_paths;
- List.iter paths ~f:(fun path -> printf " %s\n%!" path)
- end
- )
- paths_by_digest;
- let t1 = Sys.time () in
- eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0)
-
-let () =
- let input = ref Paths_on_stdin in
- Arg.parse [] (fun path -> input := Root_path path) "";
- main !input