--- /dev/null
+open Printf
+
+type t =
+ { considered_files : int ref
+ ; considered_bytes : int ref
+ ; empty : int ref
+ ; ignored_files : int ref
+ ; ignored_bytes : int ref
+ ; unique_size_files : int ref
+ ; unique_size_bytes : int ref
+ ; unique_sample_files : int ref
+ ; unique_sample_bytes : int ref
+ ; sampled_files : int ref
+ ; sampled_bytes : int ref
+ ; hashed_files : int ref
+ ; hashed_bytes : int ref
+ ; digests : int ref
+ ; redundant_data : int ref
+ }
+
+let init () =
+ { considered_files = ref 0
+ ; considered_bytes = ref 0
+ ; empty = ref 0
+ ; ignored_files = ref 0
+ ; ignored_bytes = ref 0
+ ; unique_size_files = ref 0
+ ; unique_size_bytes = ref 0
+ ; sampled_files = ref 0
+ ; sampled_bytes = ref 0
+ ; hashed_files = ref 0
+ ; hashed_bytes = ref 0
+ ; unique_sample_files = ref 0
+ ; unique_sample_bytes = ref 0
+ ; digests = ref 0
+ ; redundant_data = ref 0
+ }
+
+let add sum addend =
+ sum := !sum + addend
+
+let file_considered t ~size =
+ incr t.considered_files;
+ add t.considered_bytes size
+
+let file_ignored {ignored_files; ignored_bytes; _} ~size =
+ incr ignored_files;
+ add ignored_bytes size
+
+let file_empty t =
+ incr t.empty
+
+let chunk_read t ~size =
+ add t.sampled_bytes size
+
+let file_sampled t =
+ incr t.sampled_files
+
+let file_unique_size t ~size =
+ incr t.unique_size_files;
+ add t.unique_size_bytes size
+
+let file_unique_sample t ~size =
+ incr t.unique_sample_files;
+ add t.unique_sample_bytes size
+
+let file_hashed t ~size =
+ incr t.hashed_files;
+ add t.hashed_bytes size
+
+let digest t =
+ incr t.digests
+
+let redundant_data t ~size =
+ add t.redundant_data size
+
+let report
+ t
+ ~wall_time_all
+ ~wall_time_group_by_size
+ ~wall_time_group_by_head
+ ~wall_time_group_by_digest
+ ~proc_time_all
+ ~proc_time_group_by_size
+ ~proc_time_group_by_head
+ ~proc_time_group_by_digest
+=
+ let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in
+ let b_to_gb b = (b_to_mb b) /. 1024. in
+ eprintf "Total time : %.2f wall sec %.2f proc sec\n%!"
+ wall_time_all
+ proc_time_all;
+ eprintf "Considered : %8d files %6.2f Gb\n%!"
+ !(t.considered_files)
+ (b_to_gb !(t.considered_bytes));
+ eprintf "Sampled : %8d files %6.2f Gb\n%!"
+ !(t.sampled_files)
+ (b_to_gb !(t.sampled_bytes));
+ eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
+ !(t.hashed_files)
+ (b_to_gb !(t.hashed_bytes))
+ wall_time_group_by_digest
+ proc_time_group_by_digest;
+ eprintf "Digests : %8d\n%!"
+ !(t.digests);
+ eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!"
+ (!(t.hashed_files) - !(t.digests))
+ (b_to_gb !(t.redundant_data));
+ eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty);
+ eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
+ !(t.unique_size_files)
+ (b_to_gb !(t.unique_size_bytes))
+ wall_time_group_by_size
+ proc_time_group_by_size;
+ eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
+ !(t.unique_sample_files)
+ (b_to_gb !(t.unique_sample_bytes))
+ wall_time_group_by_head
+ proc_time_group_by_head;
+ eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!"
+ !(t.ignored_files)
+ (b_to_gb !(t.ignored_bytes))