Commit | Line | Data |
---|---|---|
cce97c27 SK |
1 | open Printf |
2 | ||
948ee900 SK |
3 | module Array = ArrayLabels |
4 | module List = ListLabels | |
8673c3a5 | 5 | module StrSet= Set.Make(String) |
cce97c27 SK |
6 | |
7 | module Stream : sig | |
948ee900 | 8 | type 'a t |
e13e9ef5 SK |
9 | |
10 | val create : (unit -> 'a option) -> 'a t | |
11 | ||
948ee900 | 12 | val iter : 'a t -> f:('a -> unit) -> unit |
8673c3a5 SK |
13 | |
14 | val concat : ('a t) list -> 'a t | |
cce97c27 SK |
15 | end = struct |
16 | module S = Stream | |
17 | ||
948ee900 | 18 | type 'a t = |
8673c3a5 | 19 | ('a S.t) list |
948ee900 | 20 | |
e13e9ef5 | 21 | let create f = |
8673c3a5 | 22 | [S.from (fun _ -> f ())] |
e13e9ef5 SK |
23 | |
24 | let iter t ~f = | |
8673c3a5 SK |
25 | List.iter t ~f:(S.iter f) |
26 | ||
27 | let concat ts = | |
28 | List.concat ts | |
e13e9ef5 SK |
29 | end |
30 | ||
31 | module In_channel : sig | |
32 | val lines : in_channel -> string Stream.t | |
33 | end = struct | |
34 | let lines ic = | |
35 | Stream.create (fun () -> | |
36 | match input_line ic with | |
37 | | exception End_of_file -> | |
38 | None | |
39 | | line -> | |
40 | Some line | |
41 | ) | |
42 | end | |
43 | ||
7b7a6b7f | 44 | module Directory_tree : sig |
e13e9ef5 SK |
45 | val find_files : string -> string Stream.t |
46 | end = struct | |
47 | let find_files root = | |
948ee900 SK |
48 | let dirs = Queue.create () in |
49 | let files = Queue.create () in | |
948ee900 SK |
50 | let explore parent = |
51 | Array.iter (Sys.readdir parent) ~f:(fun child -> | |
52 | let path = Filename.concat parent child in | |
53 | let {Unix.st_kind = file_kind; _} = Unix.lstat path in | |
54 | match file_kind with | |
55 | | Unix.S_REG -> | |
56 | Queue.add path files | |
57 | | Unix.S_DIR -> | |
58 | Queue.add path dirs | |
59 | | Unix.S_CHR | |
60 | | Unix.S_BLK | |
61 | | Unix.S_LNK | |
62 | | Unix.S_FIFO | |
63 | | Unix.S_SOCK -> | |
64 | () | |
65 | ) | |
66 | in | |
1f130f74 | 67 | explore root; |
c66266c6 SK |
68 | let rec next () = |
69 | match Queue.is_empty files, Queue.is_empty dirs with | |
70 | | false, _ -> Some (Queue.take files) | |
71 | | true , true -> None | |
72 | | true , false -> | |
73 | explore (Queue.take dirs); | |
74 | next () | |
75 | in | |
76 | Stream.create next | |
cce97c27 SK |
77 | end |
78 | ||
948ee900 | 79 | type input = |
8673c3a5 | 80 | | Root_paths of string list |
948ee900 SK |
81 | | Paths_on_stdin |
82 | ||
83 | let main input = | |
84 | let paths = | |
85 | match input with | |
8673c3a5 SK |
86 | | Paths_on_stdin -> |
87 | In_channel.lines stdin | |
88 | | Root_paths paths -> | |
89 | let paths = StrSet.elements (StrSet.of_list paths) in | |
90 | Stream.concat (List.map paths ~f:Directory_tree.find_files) | |
948ee900 | 91 | in |
cce97c27 | 92 | let paths_by_digest = Hashtbl.create 1_000_000 in |
f16e6ff1 SK |
93 | let path_count = ref 0 in |
94 | let t0 = Sys.time () in | |
948ee900 | 95 | Stream.iter paths ~f:(fun path -> |
f16e6ff1 | 96 | incr path_count; |
cce97c27 SK |
97 | try |
98 | let digest = Digest.file path in | |
99 | let paths = | |
100 | match Hashtbl.find_opt paths_by_digest digest with | |
101 | | None -> | |
8673c3a5 | 102 | StrSet.empty |
cce97c27 SK |
103 | | Some paths -> |
104 | paths | |
105 | in | |
8673c3a5 | 106 | Hashtbl.replace paths_by_digest digest (StrSet.add path paths) |
cce97c27 SK |
107 | with Sys_error e -> |
108 | eprintf "WARNING: Failed to process %S: %S\n%!" path e | |
109 | ); | |
110 | Hashtbl.iter | |
111 | (fun digest paths -> | |
8673c3a5 | 112 | let n_paths = StrSet.cardinal paths in |
cce97c27 SK |
113 | if n_paths > 1 then begin |
114 | printf "%s %d\n%!" (Digest.to_hex digest) n_paths; | |
8673c3a5 | 115 | List.iter (StrSet.elements paths) ~f:(printf " %s\n%!") |
cce97c27 SK |
116 | end |
117 | ) | |
f16e6ff1 SK |
118 | paths_by_digest; |
119 | let t1 = Sys.time () in | |
120 | eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0) | |
cce97c27 SK |
121 | |
122 | let () = | |
948ee900 | 123 | let input = ref Paths_on_stdin in |
61a05dbb SK |
124 | Arg.parse |
125 | [] | |
126 | (function | |
127 | | path when Sys.file_exists path -> | |
8673c3a5 SK |
128 | (match !input with |
129 | | Paths_on_stdin -> | |
130 | input := Root_paths [path] | |
131 | | Root_paths paths -> | |
132 | input := Root_paths (path :: paths) | |
133 | ) | |
61a05dbb SK |
134 | | path -> |
135 | eprintf "File does not exist: %S\n%!" path; | |
136 | exit 1 | |
137 | ) | |
138 | ""; | |
948ee900 | 139 | main !input |