X-Git-Url: https://git.xandkar.net/?p=dups.git;a=blobdiff_plain;f=lib%2Fmetrics.ml;fp=lib%2Fmetrics.ml;h=877587429878dea9063f7d175b4ad7ef6674cdb3;hp=0000000000000000000000000000000000000000;hb=ddcbda0046a598d55746850e15d4fa99b3998ce0;hpb=21e1d14c1e23d2c586ebe1480add8e9d87e7ad7a diff --git a/lib/metrics.ml b/lib/metrics.ml new file mode 100644 index 0000000..8775874 --- /dev/null +++ b/lib/metrics.ml @@ -0,0 +1,122 @@ +open Printf + +type t = + { considered_files : int ref + ; considered_bytes : int ref + ; empty : int ref + ; ignored_files : int ref + ; ignored_bytes : int ref + ; unique_size_files : int ref + ; unique_size_bytes : int ref + ; unique_sample_files : int ref + ; unique_sample_bytes : int ref + ; sampled_files : int ref + ; sampled_bytes : int ref + ; hashed_files : int ref + ; hashed_bytes : int ref + ; digests : int ref + ; redundant_data : int ref + } + +let init () = + { considered_files = ref 0 + ; considered_bytes = ref 0 + ; empty = ref 0 + ; ignored_files = ref 0 + ; ignored_bytes = ref 0 + ; unique_size_files = ref 0 + ; unique_size_bytes = ref 0 + ; sampled_files = ref 0 + ; sampled_bytes = ref 0 + ; hashed_files = ref 0 + ; hashed_bytes = ref 0 + ; unique_sample_files = ref 0 + ; unique_sample_bytes = ref 0 + ; digests = ref 0 + ; redundant_data = ref 0 + } + +let add sum addend = + sum := !sum + addend + +let file_considered t ~size = + incr t.considered_files; + add t.considered_bytes size + +let file_ignored {ignored_files; ignored_bytes; _} ~size = + incr ignored_files; + add ignored_bytes size + +let file_empty t = + incr t.empty + +let chunk_read t ~size = + add t.sampled_bytes size + +let file_sampled t = + incr t.sampled_files + +let file_unique_size t ~size = + incr t.unique_size_files; + add t.unique_size_bytes size + +let file_unique_sample t ~size = + incr t.unique_sample_files; + add t.unique_sample_bytes size + +let file_hashed t ~size = + incr t.hashed_files; + add t.hashed_bytes size + +let digest t = + incr t.digests + +let redundant_data t ~size = + add t.redundant_data size + +let report + t + ~wall_time_all + ~wall_time_group_by_size + ~wall_time_group_by_head + ~wall_time_group_by_digest + ~proc_time_all + ~proc_time_group_by_size + ~proc_time_group_by_head + ~proc_time_group_by_digest += + let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in + let b_to_gb b = (b_to_mb b) /. 1024. in + eprintf "Total time : %.2f wall sec %.2f proc sec\n%!" + wall_time_all + proc_time_all; + eprintf "Considered : %8d files %6.2f Gb\n%!" + !(t.considered_files) + (b_to_gb !(t.considered_bytes)); + eprintf "Sampled : %8d files %6.2f Gb\n%!" + !(t.sampled_files) + (b_to_gb !(t.sampled_bytes)); + eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" + !(t.hashed_files) + (b_to_gb !(t.hashed_bytes)) + wall_time_group_by_digest + proc_time_group_by_digest; + eprintf "Digests : %8d\n%!" + !(t.digests); + eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!" + (!(t.hashed_files) - !(t.digests)) + (b_to_gb !(t.redundant_data)); + eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty); + eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" + !(t.unique_size_files) + (b_to_gb !(t.unique_size_bytes)) + wall_time_group_by_size + proc_time_group_by_size; + eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" + !(t.unique_sample_files) + (b_to_gb !(t.unique_sample_bytes)) + wall_time_group_by_head + proc_time_group_by_head; + eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!" + !(t.ignored_files) + (b_to_gb !(t.ignored_bytes))