home
/
code
/
dups.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Make ignore-pattern a closure
[dups.git]
/
dups.ml
diff --git
a/dups.ml
b/dups.ml
index
14132a6
..
7c03773
100644
(file)
--- a/
dups.ml
+++ b/
dups.ml
@@
-36,6
+36,8
@@
module Metrics : sig
: t -> size:int -> unit
val digest
: t -> unit
: t -> size:int -> unit
val digest
: t -> unit
+ val redundant_data
+ : t -> size:int -> unit
end = struct
type t =
{ considered_files : int ref
end = struct
type t =
{ considered_files : int ref
@@
-52,6
+54,7
@@
end = struct
; hashed_files : int ref
; hashed_bytes : int ref
; digests : int ref
; hashed_files : int ref
; hashed_bytes : int ref
; digests : int ref
+ ; redundant_data : int ref
}
let init () =
}
let init () =
@@
-69,6
+72,7
@@
end = struct
; unique_sample_files = ref 0
; unique_sample_bytes = ref 0
; digests = ref 0
; unique_sample_files = ref 0
; unique_sample_bytes = ref 0
; digests = ref 0
+ ; redundant_data = ref 0
}
let add sum addend =
}
let add sum addend =
@@
-106,6
+110,9
@@
end = struct
let digest t =
incr t.digests
let digest t =
incr t.digests
+ let redundant_data t ~size =
+ add t.redundant_data size
+
let report
t
~time_all
let report
t
~time_all
@@
-129,8
+136,9
@@
end = struct
time_group_by_digest;
eprintf "Digests : %8d\n%!"
!(t.digests);
time_group_by_digest;
eprintf "Digests : %8d\n%!"
!(t.digests);
- eprintf "Duplicates (Hashed - Digests): %8d\n%!"
- (!(t.hashed_files) - !(t.digests));
+ eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!"
+ (!(t.hashed_files) - !(t.digests))
+ (b_to_gb !(t.redundant_data));
eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty);
eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f seconds\n%!"
!(t.unique_size_files)
eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty);
eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f seconds\n%!"
!(t.unique_size_files)
@@
-372,7
+380,7
@@
type output =
type opt =
{ input : input
; output : output
type opt =
{ input : input
; output : output
- ; ignore :
Str.regexp option
+ ; ignore :
string -> bool
; sample : int
}
; sample : int
}
@@
-388,15
+396,9
@@
let make_input_stream input ignore ~metrics =
Stream.filter input ~f:(fun {File.path; size} ->
M.file_considered metrics ~size;
let empty = size = 0 in
Stream.filter input ~f:(fun {File.path; size} ->
M.file_considered metrics ~size;
let empty = size = 0 in
+ let ignored = ignore path in
if empty then M.file_empty metrics;
if empty then M.file_empty metrics;
- let ignored =
- match ignore with
- | Some regexp when (Str.string_match regexp path 0) ->
- M.file_ignored metrics ~size;
- true
- | Some _ | None ->
- false
- in
+ if ignored then M.file_ignored metrics ~size;
(not empty) && (not ignored)
)
(not empty) && (not ignored)
)
@@
-454,7
+456,9
@@
let main {input; output; ignore; sample = sample_len} =
Stream.iter groups ~f:(fun (d, n, files) ->
M.digest metrics;
Stream.iter groups ~f:(fun (d, n, files) ->
M.digest metrics;
- if n > 1 then output d n files
+ if n > 1 then
+ M.redundant_data metrics ~size:(n * (List.hd files).File.size);
+ output d n files
);
let t1_all = Sys.time () in
);
let t1_all = Sys.time () in
@@
-480,7
+484,7
@@
let get_opt () : opt =
in
let input = ref Stdin in
let output = ref Stdout in
in
let input = ref Stdin in
let output = ref Stdout in
- let ignore = ref
None
in
+ let ignore = ref
(fun _ -> false)
in
let sample = ref 256 in
let spec =
[ ( "-out"
let sample = ref 256 in
let spec =
[ ( "-out"
@@
-492,7
+496,9
@@
let get_opt () : opt =
, " Output to this directory instead of stdout."
)
; ( "-ignore"
, " Output to this directory instead of stdout."
)
; ( "-ignore"
- , Arg.String (fun regexp -> ignore := Some (Str.regexp regexp))
+ , Arg.String (fun regexp ->
+ let regexp = Str.regexp regexp in
+ ignore := fun string -> Str.string_match regexp string 0)
, " Ignore file paths which match this regexp pattern (see Str module)."
)
; ( "-sample"
, " Ignore file paths which match this regexp pattern (see Str module)."
)
; ( "-sample"
This page took
0.022175 seconds
and
4
git commands to generate.