| 1 | open Printf |
| 2 | |
| 3 | module List = ListLabels |
| 4 | |
| 5 | module Stream : sig |
| 6 | val lines : in_channel -> f:(string -> unit) -> unit |
| 7 | end = struct |
| 8 | module S = Stream |
| 9 | |
| 10 | let lines_of_channel ic = |
| 11 | S.from (fun _ -> |
| 12 | match input_line ic with |
| 13 | | exception End_of_file -> |
| 14 | None |
| 15 | | line -> |
| 16 | Some line |
| 17 | ) |
| 18 | |
| 19 | let iter t ~f = |
| 20 | S.iter f t |
| 21 | |
| 22 | let lines ic ~f = |
| 23 | iter (lines_of_channel ic) ~f |
| 24 | end |
| 25 | |
| 26 | let main ic = |
| 27 | let paths_by_digest = Hashtbl.create 1_000_000 in |
| 28 | Stream.lines ic ~f:(fun path -> |
| 29 | try |
| 30 | let digest = Digest.file path in |
| 31 | let paths = |
| 32 | match Hashtbl.find_opt paths_by_digest digest with |
| 33 | | None -> |
| 34 | [] |
| 35 | | Some paths -> |
| 36 | paths |
| 37 | in |
| 38 | Hashtbl.replace paths_by_digest digest (path :: paths) |
| 39 | with Sys_error e -> |
| 40 | eprintf "WARNING: Failed to process %S: %S\n%!" path e |
| 41 | ); |
| 42 | Hashtbl.iter |
| 43 | (fun digest paths -> |
| 44 | let n_paths = List.length paths in |
| 45 | if n_paths > 1 then begin |
| 46 | printf "%s %d\n%!" (Digest.to_hex digest) n_paths; |
| 47 | List.iter paths ~f:(fun path -> printf " %s\n%!" path) |
| 48 | end |
| 49 | ) |
| 50 | paths_by_digest |
| 51 | |
| 52 | let () = |
| 53 | let ic = ref stdin in |
| 54 | Arg.parse [] (fun filename -> ic := open_in filename) ""; |
| 55 | main !ic; |
| 56 | close_in !ic |