X-Git-Url: https://git.xandkar.net/?p=dups.git;a=blobdiff_plain;f=dups.ml;h=697894d4c3c1d08d6a421e8038fd278da708f53c;hp=b6782b68f9021bdbc81522d75ca236dc029a8a02;hb=34107832c93f775a6879b1d8ec123f3679eb154b;hpb=e09dff7f9b5875b81cf50b33dcf97635518f7c28 diff --git a/dups.ml b/dups.ml index b6782b6..697894d 100644 --- a/dups.ml +++ b/dups.ml @@ -78,17 +78,17 @@ end = struct end type input = - | Root_paths of string list - | Paths_on_stdin + | Stdin + | Directories of string list type output = | Stdout | Directory of string let make_input_stream = function - | Paths_on_stdin -> + | Stdin -> In_channel.lines stdin - | Root_paths paths -> + | Directories paths -> let paths = StrSet.elements (StrSet.of_list paths) in Stream.concat (List.map paths ~f:Directory_tree.find_files) @@ -108,14 +108,13 @@ let make_output_fun = function ); close_out oc -let main input output = +let main input output ignore = let output = make_output_fun output in let input = make_input_stream input in let paths_by_digest = Hashtbl.create 1_000_000 in let path_count = ref 0 in let t0 = Sys.time () in - Stream.iter input ~f:(fun path -> - incr path_count; + let process path = try let digest = Digest.file path in let count, paths = @@ -128,14 +127,23 @@ let main input output = Hashtbl.replace paths_by_digest digest (count + 1, StrSet.add path paths) with Sys_error e -> eprintf "WARNING: Failed to process %S: %S\n%!" path e + in + Stream.iter input ~f:(fun path -> + incr path_count; + match ignore with + | Some regexp when (Str.string_match regexp path 0) -> + () + | Some _ | None -> + process path ); Hashtbl.iter (fun d (n, ps) -> if n > 1 then output d n ps) paths_by_digest; let t1 = Sys.time () in eprintf "Processed %d files in %f seconds.\n%!" !path_count (t1 -. t0) let () = - let input = ref Paths_on_stdin in + let input = ref Stdin in let output = ref Stdout in + let ignore = ref None in let assert_file_exists path = if Sys.file_exists path then () @@ -161,6 +169,10 @@ let () = ) , " Output to this directory instead of stdout." ) + ; ( "-ignore" + , Arg.String (fun regexp -> ignore := Some (Str.regexp regexp)) + , " Ignore file paths which match this regexp pattern (see Str module)." + ) ] in Arg.parse @@ -169,10 +181,10 @@ let () = assert_file_exists path; assert_file_is_dir path; match !input with - | Paths_on_stdin -> - input := Root_paths [path] - | Root_paths paths -> - input := Root_paths (path :: paths) + | Stdin -> + input := Directories [path] + | Directories paths -> + input := Directories (path :: paths) ) ""; - main !input !output + main !input !output !ignore