Initial prototype
[dups.git] / dupfiles.ml
CommitLineData
cce97c27
SK
1open Printf
2
3module List = ListLabels
4
5module Stream : sig
6 val lines : in_channel -> f:(string -> unit) -> unit
7end = struct
8 module S = Stream
9
10 let lines_of_channel ic =
11 S.from (fun _ ->
12 match input_line ic with
13 | exception End_of_file ->
14 None
15 | line ->
16 Some line
17 )
18
19 let iter t ~f =
20 S.iter f t
21
22 let lines ic ~f =
23 iter (lines_of_channel ic) ~f
24end
25
26let main ic =
27 let paths_by_digest = Hashtbl.create 1_000_000 in
28 Stream.lines ic ~f:(fun path ->
29 try
30 let digest = Digest.file path in
31 let paths =
32 match Hashtbl.find_opt paths_by_digest digest with
33 | None ->
34 []
35 | Some paths ->
36 paths
37 in
38 Hashtbl.replace paths_by_digest digest (path :: paths)
39 with Sys_error e ->
40 eprintf "WARNING: Failed to process %S: %S\n%!" path e
41 );
42 Hashtbl.iter
43 (fun digest paths ->
44 let n_paths = List.length paths in
45 if n_paths > 1 then begin
46 printf "%s %d\n%!" (Digest.to_hex digest) n_paths;
47 List.iter paths ~f:(fun path -> printf " %s\n%!" path)
48 end
49 )
50 paths_by_digest
51
52let () =
53 let ic = ref stdin in
54 Arg.parse [] (fun filename -> ic := open_in filename) "";
55 main !ic;
56 close_in !ic
This page took 0.018779 seconds and 4 git commands to generate.