open Printf type t = { considered_files : int ref ; considered_bytes : int ref ; empty : int ref ; ignored_files : int ref ; ignored_bytes : int ref ; unique_size_files : int ref ; unique_size_bytes : int ref ; unique_sample_files : int ref ; unique_sample_bytes : int ref ; sampled_files : int ref ; sampled_bytes : int ref ; hashed_files : int ref ; hashed_bytes : int ref ; digests : int ref ; redundant_data : int ref } let init () = { considered_files = ref 0 ; considered_bytes = ref 0 ; empty = ref 0 ; ignored_files = ref 0 ; ignored_bytes = ref 0 ; unique_size_files = ref 0 ; unique_size_bytes = ref 0 ; sampled_files = ref 0 ; sampled_bytes = ref 0 ; hashed_files = ref 0 ; hashed_bytes = ref 0 ; unique_sample_files = ref 0 ; unique_sample_bytes = ref 0 ; digests = ref 0 ; redundant_data = ref 0 } let add sum addend = sum := !sum + addend let file_considered t ~size = incr t.considered_files; add t.considered_bytes size let file_ignored {ignored_files; ignored_bytes; _} ~size = incr ignored_files; add ignored_bytes size let file_empty t = incr t.empty let chunk_read t ~size = add t.sampled_bytes size let file_sampled t = incr t.sampled_files let file_unique_size t ~size = incr t.unique_size_files; add t.unique_size_bytes size let file_unique_sample t ~size = incr t.unique_sample_files; add t.unique_sample_bytes size let file_hashed t ~size = incr t.hashed_files; add t.hashed_bytes size let digest t = incr t.digests let redundant_data t ~size = add t.redundant_data size let report t ~wall_time_all ~wall_time_group_by_size ~wall_time_group_by_head ~wall_time_group_by_digest ~proc_time_all ~proc_time_group_by_size ~proc_time_group_by_head ~proc_time_group_by_digest = let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in let b_to_gb b = (b_to_mb b) /. 1024. in eprintf "Total time : %.2f wall sec %.2f proc sec\n%!" wall_time_all proc_time_all; eprintf "Considered : %8d files %6.2f Gb\n%!" !(t.considered_files) (b_to_gb !(t.considered_bytes)); eprintf "Sampled : %8d files %6.2f Gb\n%!" !(t.sampled_files) (b_to_gb !(t.sampled_bytes)); eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" !(t.hashed_files) (b_to_gb !(t.hashed_bytes)) wall_time_group_by_digest proc_time_group_by_digest; eprintf "Digests : %8d\n%!" !(t.digests); eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!" (!(t.hashed_files) - !(t.digests)) (b_to_gb !(t.redundant_data)); eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty); eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" !(t.unique_size_files) (b_to_gb !(t.unique_size_bytes)) wall_time_group_by_size proc_time_group_by_size; eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" !(t.unique_sample_files) (b_to_gb !(t.unique_sample_bytes)) wall_time_group_by_head proc_time_group_by_head; eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!" !(t.ignored_files) (b_to_gb !(t.ignored_bytes))