Commit | Line | Data |
---|---|---|
cce97c27 SK |
1 | open Printf |
2 | ||
948ee900 SK |
3 | module Array = ArrayLabels |
4 | module List = ListLabels | |
cce97c27 SK |
5 | |
6 | module Stream : sig | |
948ee900 | 7 | type 'a t |
e13e9ef5 SK |
8 | |
9 | val create : (unit -> 'a option) -> 'a t | |
10 | ||
948ee900 | 11 | val iter : 'a t -> f:('a -> unit) -> unit |
cce97c27 SK |
12 | end = struct |
13 | module S = Stream | |
14 | ||
948ee900 SK |
15 | type 'a t = |
16 | 'a S.t | |
17 | ||
e13e9ef5 SK |
18 | let create f = |
19 | S.from (fun _ -> f ()) | |
20 | ||
21 | let iter t ~f = | |
22 | S.iter f t | |
23 | end | |
24 | ||
25 | module In_channel : sig | |
26 | val lines : in_channel -> string Stream.t | |
27 | end = struct | |
28 | let lines ic = | |
29 | Stream.create (fun () -> | |
30 | match input_line ic with | |
31 | | exception End_of_file -> | |
32 | None | |
33 | | line -> | |
34 | Some line | |
35 | ) | |
36 | end | |
37 | ||
38 | module Directory : sig | |
39 | val find_files : string -> string Stream.t | |
40 | end = struct | |
41 | let find_files root = | |
948ee900 SK |
42 | let dirs = Queue.create () in |
43 | let files = Queue.create () in | |
948ee900 SK |
44 | let explore parent = |
45 | Array.iter (Sys.readdir parent) ~f:(fun child -> | |
46 | let path = Filename.concat parent child in | |
47 | let {Unix.st_kind = file_kind; _} = Unix.lstat path in | |
48 | match file_kind with | |
49 | | Unix.S_REG -> | |
50 | Queue.add path files | |
51 | | Unix.S_DIR -> | |
52 | Queue.add path dirs | |
53 | | Unix.S_CHR | |
54 | | Unix.S_BLK | |
55 | | Unix.S_LNK | |
56 | | Unix.S_FIFO | |
57 | | Unix.S_SOCK -> | |
58 | () | |
59 | ) | |
60 | in | |
61 | let next_dir () = | |
62 | match Queue.take dirs with | |
63 | | exception Queue.Empty -> | |
64 | () | |
65 | | dir -> | |
66 | explore dir | |
67 | in | |
68 | let next_file () = | |
69 | match Queue.take files with | |
70 | | exception Queue.Empty -> | |
71 | None | |
72 | | file_path -> | |
73 | Some file_path | |
74 | in | |
1f130f74 | 75 | explore root; |
e13e9ef5 | 76 | Stream.create (fun () -> |
948ee900 SK |
77 | next_dir (); |
78 | next_file () | |
79 | ) | |
cce97c27 SK |
80 | end |
81 | ||
948ee900 SK |
82 | type input = |
83 | | Root_path of string | |
84 | | Paths_on_stdin | |
85 | ||
86 | let main input = | |
87 | let paths = | |
88 | match input with | |
e13e9ef5 SK |
89 | | Paths_on_stdin -> In_channel.lines stdin |
90 | | Root_path root -> Directory.find_files root | |
948ee900 | 91 | in |
cce97c27 | 92 | let paths_by_digest = Hashtbl.create 1_000_000 in |
f16e6ff1 SK |
93 | let path_count = ref 0 in |
94 | let t0 = Sys.time () in | |
948ee900 | 95 | Stream.iter paths ~f:(fun path -> |
f16e6ff1 | 96 | incr path_count; |
cce97c27 SK |
97 | try |
98 | let digest = Digest.file path in | |
99 | let paths = | |
100 | match Hashtbl.find_opt paths_by_digest digest with | |
101 | | None -> | |
102 | [] | |
103 | | Some paths -> | |
104 | paths | |
105 | in | |
106 | Hashtbl.replace paths_by_digest digest (path :: paths) | |
107 | with Sys_error e -> | |
108 | eprintf "WARNING: Failed to process %S: %S\n%!" path e | |
109 | ); | |
110 | Hashtbl.iter | |
111 | (fun digest paths -> | |
112 | let n_paths = List.length paths in | |
113 | if n_paths > 1 then begin | |
114 | printf "%s %d\n%!" (Digest.to_hex digest) n_paths; | |
115 | List.iter paths ~f:(fun path -> printf " %s\n%!" path) | |
116 | end | |
117 | ) | |
f16e6ff1 SK |
118 | paths_by_digest; |
119 | let t1 = Sys.time () in | |
120 | eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0) | |
cce97c27 SK |
121 | |
122 | let () = | |
948ee900 | 123 | let input = ref Paths_on_stdin in |
1f130f74 SK |
124 | Arg.parse [] (fun path -> |
125 | if Sys.file_exists path then | |
126 | input := Root_path path | |
127 | else begin | |
128 | eprintf "File does not exist: %S\n%!" path; | |
129 | exit 1 | |
130 | end | |
131 | ) ""; | |
948ee900 | 132 | main !input |