Commit | Line | Data |
---|---|---|
cce97c27 SK |
1 | open Printf |
2 | ||
948ee900 SK |
3 | module Array = ArrayLabels |
4 | module List = ListLabels | |
cce97c27 SK |
5 | |
6 | module Stream : sig | |
948ee900 SK |
7 | type 'a t |
8 | val lines : in_channel -> string t | |
9 | val rec_file_paths : root:string -> string t | |
10 | val iter : 'a t -> f:('a -> unit) -> unit | |
cce97c27 SK |
11 | end = struct |
12 | module S = Stream | |
13 | ||
948ee900 SK |
14 | type 'a t = |
15 | 'a S.t | |
16 | ||
17 | let rec_file_paths ~root = | |
18 | let dirs = Queue.create () in | |
19 | let files = Queue.create () in | |
20 | Queue.add root dirs; | |
21 | let explore parent = | |
22 | Array.iter (Sys.readdir parent) ~f:(fun child -> | |
23 | let path = Filename.concat parent child in | |
24 | let {Unix.st_kind = file_kind; _} = Unix.lstat path in | |
25 | match file_kind with | |
26 | | Unix.S_REG -> | |
27 | Queue.add path files | |
28 | | Unix.S_DIR -> | |
29 | Queue.add path dirs | |
30 | | Unix.S_CHR | |
31 | | Unix.S_BLK | |
32 | | Unix.S_LNK | |
33 | | Unix.S_FIFO | |
34 | | Unix.S_SOCK -> | |
35 | () | |
36 | ) | |
37 | in | |
38 | let next_dir () = | |
39 | match Queue.take dirs with | |
40 | | exception Queue.Empty -> | |
41 | () | |
42 | | dir -> | |
43 | explore dir | |
44 | in | |
45 | let next_file () = | |
46 | match Queue.take files with | |
47 | | exception Queue.Empty -> | |
48 | None | |
49 | | file_path -> | |
50 | Some file_path | |
51 | in | |
52 | S.from (fun _ -> | |
53 | next_dir (); | |
54 | next_file () | |
55 | ) | |
56 | ||
57 | let lines ic = | |
cce97c27 SK |
58 | S.from (fun _ -> |
59 | match input_line ic with | |
60 | | exception End_of_file -> | |
61 | None | |
62 | | line -> | |
63 | Some line | |
64 | ) | |
65 | ||
66 | let iter t ~f = | |
67 | S.iter f t | |
cce97c27 SK |
68 | end |
69 | ||
948ee900 SK |
70 | type input = |
71 | | Root_path of string | |
72 | | Paths_on_stdin | |
73 | ||
74 | let main input = | |
75 | let paths = | |
76 | match input with | |
77 | | Paths_on_stdin -> Stream.lines stdin | |
78 | | Root_path root -> Stream.rec_file_paths ~root | |
79 | in | |
cce97c27 | 80 | let paths_by_digest = Hashtbl.create 1_000_000 in |
948ee900 | 81 | Stream.iter paths ~f:(fun path -> |
cce97c27 SK |
82 | try |
83 | let digest = Digest.file path in | |
84 | let paths = | |
85 | match Hashtbl.find_opt paths_by_digest digest with | |
86 | | None -> | |
87 | [] | |
88 | | Some paths -> | |
89 | paths | |
90 | in | |
91 | Hashtbl.replace paths_by_digest digest (path :: paths) | |
92 | with Sys_error e -> | |
93 | eprintf "WARNING: Failed to process %S: %S\n%!" path e | |
94 | ); | |
95 | Hashtbl.iter | |
96 | (fun digest paths -> | |
97 | let n_paths = List.length paths in | |
98 | if n_paths > 1 then begin | |
99 | printf "%s %d\n%!" (Digest.to_hex digest) n_paths; | |
100 | List.iter paths ~f:(fun path -> printf " %s\n%!" path) | |
101 | end | |
102 | ) | |
103 | paths_by_digest | |
104 | ||
105 | let () = | |
948ee900 SK |
106 | let input = ref Paths_on_stdin in |
107 | Arg.parse [] (fun path -> input := Root_path path) ""; | |
108 | main !input |