Commit | Line | Data |
---|---|---|
cce97c27 SK |
1 | open Printf |
2 | ||
948ee900 SK |
3 | module Array = ArrayLabels |
4 | module List = ListLabels | |
cce97c27 SK |
5 | |
6 | module Stream : sig | |
948ee900 | 7 | type 'a t |
e13e9ef5 SK |
8 | |
9 | val create : (unit -> 'a option) -> 'a t | |
10 | ||
948ee900 | 11 | val iter : 'a t -> f:('a -> unit) -> unit |
cce97c27 SK |
12 | end = struct |
13 | module S = Stream | |
14 | ||
948ee900 SK |
15 | type 'a t = |
16 | 'a S.t | |
17 | ||
e13e9ef5 SK |
18 | let create f = |
19 | S.from (fun _ -> f ()) | |
20 | ||
21 | let iter t ~f = | |
22 | S.iter f t | |
23 | end | |
24 | ||
25 | module In_channel : sig | |
26 | val lines : in_channel -> string Stream.t | |
27 | end = struct | |
28 | let lines ic = | |
29 | Stream.create (fun () -> | |
30 | match input_line ic with | |
31 | | exception End_of_file -> | |
32 | None | |
33 | | line -> | |
34 | Some line | |
35 | ) | |
36 | end | |
37 | ||
38 | module Directory : sig | |
39 | val find_files : string -> string Stream.t | |
40 | end = struct | |
41 | let find_files root = | |
948ee900 SK |
42 | let dirs = Queue.create () in |
43 | let files = Queue.create () in | |
948ee900 SK |
44 | let explore parent = |
45 | Array.iter (Sys.readdir parent) ~f:(fun child -> | |
46 | let path = Filename.concat parent child in | |
47 | let {Unix.st_kind = file_kind; _} = Unix.lstat path in | |
48 | match file_kind with | |
49 | | Unix.S_REG -> | |
50 | Queue.add path files | |
51 | | Unix.S_DIR -> | |
52 | Queue.add path dirs | |
53 | | Unix.S_CHR | |
54 | | Unix.S_BLK | |
55 | | Unix.S_LNK | |
56 | | Unix.S_FIFO | |
57 | | Unix.S_SOCK -> | |
58 | () | |
59 | ) | |
60 | in | |
1f130f74 | 61 | explore root; |
c66266c6 SK |
62 | let rec next () = |
63 | match Queue.is_empty files, Queue.is_empty dirs with | |
64 | | false, _ -> Some (Queue.take files) | |
65 | | true , true -> None | |
66 | | true , false -> | |
67 | explore (Queue.take dirs); | |
68 | next () | |
69 | in | |
70 | Stream.create next | |
cce97c27 SK |
71 | end |
72 | ||
948ee900 SK |
73 | type input = |
74 | | Root_path of string | |
75 | | Paths_on_stdin | |
76 | ||
77 | let main input = | |
78 | let paths = | |
79 | match input with | |
e13e9ef5 SK |
80 | | Paths_on_stdin -> In_channel.lines stdin |
81 | | Root_path root -> Directory.find_files root | |
948ee900 | 82 | in |
cce97c27 | 83 | let paths_by_digest = Hashtbl.create 1_000_000 in |
f16e6ff1 SK |
84 | let path_count = ref 0 in |
85 | let t0 = Sys.time () in | |
948ee900 | 86 | Stream.iter paths ~f:(fun path -> |
f16e6ff1 | 87 | incr path_count; |
cce97c27 SK |
88 | try |
89 | let digest = Digest.file path in | |
90 | let paths = | |
91 | match Hashtbl.find_opt paths_by_digest digest with | |
92 | | None -> | |
93 | [] | |
94 | | Some paths -> | |
95 | paths | |
96 | in | |
97 | Hashtbl.replace paths_by_digest digest (path :: paths) | |
98 | with Sys_error e -> | |
99 | eprintf "WARNING: Failed to process %S: %S\n%!" path e | |
100 | ); | |
101 | Hashtbl.iter | |
102 | (fun digest paths -> | |
103 | let n_paths = List.length paths in | |
104 | if n_paths > 1 then begin | |
105 | printf "%s %d\n%!" (Digest.to_hex digest) n_paths; | |
106 | List.iter paths ~f:(fun path -> printf " %s\n%!" path) | |
107 | end | |
108 | ) | |
f16e6ff1 SK |
109 | paths_by_digest; |
110 | let t1 = Sys.time () in | |
111 | eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0) | |
cce97c27 SK |
112 | |
113 | let () = | |
948ee900 | 114 | let input = ref Paths_on_stdin in |
1f130f74 SK |
115 | Arg.parse [] (fun path -> |
116 | if Sys.file_exists path then | |
117 | input := Root_path path | |
118 | else begin | |
119 | eprintf "File does not exist: %S\n%!" path; | |
120 | exit 1 | |
121 | end | |
122 | ) ""; | |
948ee900 | 123 | main !input |