From 34107832c93f775a6879b1d8ec123f3679eb154b Mon Sep 17 00:00:00 2001 From: Siraaj Khandkar Date: Sun, 18 Nov 2018 12:44:04 -0500 Subject: [PATCH] Add option to ignore filepaths matching a pattern --- Makefile | 2 +- dups.ml | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 13520a8..2807b9b 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ all: @$(MAKE) -s build build: - @ocamlbuild -cflags '-w A' -pkg 'unix' $(EXE_NAME).$(EXE_TYPE) + @ocamlbuild -cflags '-w A' -pkgs 'str,unix' $(EXE_NAME).$(EXE_TYPE) @cp _build/$(EXE_NAME).$(EXE_TYPE) $(EXE_NAME) @rm -f $(EXE_NAME).$(EXE_TYPE) diff --git a/dups.ml b/dups.ml index 67a4fd3..697894d 100644 --- a/dups.ml +++ b/dups.ml @@ -108,14 +108,13 @@ let make_output_fun = function ); close_out oc -let main input output = +let main input output ignore = let output = make_output_fun output in let input = make_input_stream input in let paths_by_digest = Hashtbl.create 1_000_000 in let path_count = ref 0 in let t0 = Sys.time () in - Stream.iter input ~f:(fun path -> - incr path_count; + let process path = try let digest = Digest.file path in let count, paths = @@ -128,6 +127,14 @@ let main input output = Hashtbl.replace paths_by_digest digest (count + 1, StrSet.add path paths) with Sys_error e -> eprintf "WARNING: Failed to process %S: %S\n%!" path e + in + Stream.iter input ~f:(fun path -> + incr path_count; + match ignore with + | Some regexp when (Str.string_match regexp path 0) -> + () + | Some _ | None -> + process path ); Hashtbl.iter (fun d (n, ps) -> if n > 1 then output d n ps) paths_by_digest; let t1 = Sys.time () in @@ -136,6 +143,7 @@ let main input output = let () = let input = ref Stdin in let output = ref Stdout in + let ignore = ref None in let assert_file_exists path = if Sys.file_exists path then () @@ -161,6 +169,10 @@ let () = ) , " Output to this directory instead of stdout." ) + ; ( "-ignore" + , Arg.String (fun regexp -> ignore := Some (Str.regexp regexp)) + , " Ignore file paths which match this regexp pattern (see Str module)." + ) ] in Arg.parse @@ -175,4 +187,4 @@ let () = input := Directories (path :: paths) ) ""; - main !input !output + main !input !output !ignore -- 2.20.1