| 1 | open Printf |
| 2 | |
| 3 | type t = |
| 4 | { considered_files : int ref |
| 5 | ; considered_bytes : int ref |
| 6 | ; empty : int ref |
| 7 | ; ignored_files : int ref |
| 8 | ; ignored_bytes : int ref |
| 9 | ; unique_size_files : int ref |
| 10 | ; unique_size_bytes : int ref |
| 11 | ; unique_sample_files : int ref |
| 12 | ; unique_sample_bytes : int ref |
| 13 | ; sampled_files : int ref |
| 14 | ; sampled_bytes : int ref |
| 15 | ; hashed_files : int ref |
| 16 | ; hashed_bytes : int ref |
| 17 | ; digests : int ref |
| 18 | ; redundant_data : int ref |
| 19 | } |
| 20 | |
| 21 | let init () = |
| 22 | { considered_files = ref 0 |
| 23 | ; considered_bytes = ref 0 |
| 24 | ; empty = ref 0 |
| 25 | ; ignored_files = ref 0 |
| 26 | ; ignored_bytes = ref 0 |
| 27 | ; unique_size_files = ref 0 |
| 28 | ; unique_size_bytes = ref 0 |
| 29 | ; sampled_files = ref 0 |
| 30 | ; sampled_bytes = ref 0 |
| 31 | ; hashed_files = ref 0 |
| 32 | ; hashed_bytes = ref 0 |
| 33 | ; unique_sample_files = ref 0 |
| 34 | ; unique_sample_bytes = ref 0 |
| 35 | ; digests = ref 0 |
| 36 | ; redundant_data = ref 0 |
| 37 | } |
| 38 | |
| 39 | let add sum addend = |
| 40 | sum := !sum + addend |
| 41 | |
| 42 | let file_considered t ~size = |
| 43 | incr t.considered_files; |
| 44 | add t.considered_bytes size |
| 45 | |
| 46 | let file_ignored {ignored_files; ignored_bytes; _} ~size = |
| 47 | incr ignored_files; |
| 48 | add ignored_bytes size |
| 49 | |
| 50 | let file_empty t = |
| 51 | incr t.empty |
| 52 | |
| 53 | let chunk_read t ~size = |
| 54 | add t.sampled_bytes size |
| 55 | |
| 56 | let file_sampled t = |
| 57 | incr t.sampled_files |
| 58 | |
| 59 | let file_unique_size t ~size = |
| 60 | incr t.unique_size_files; |
| 61 | add t.unique_size_bytes size |
| 62 | |
| 63 | let file_unique_sample t ~size = |
| 64 | incr t.unique_sample_files; |
| 65 | add t.unique_sample_bytes size |
| 66 | |
| 67 | let file_hashed t ~size = |
| 68 | incr t.hashed_files; |
| 69 | add t.hashed_bytes size |
| 70 | |
| 71 | let digest t = |
| 72 | incr t.digests |
| 73 | |
| 74 | let redundant_data t ~size = |
| 75 | add t.redundant_data size |
| 76 | |
| 77 | let report |
| 78 | t |
| 79 | ~wall_time_all |
| 80 | ~wall_time_group_by_size |
| 81 | ~wall_time_group_by_head |
| 82 | ~wall_time_group_by_digest |
| 83 | ~proc_time_all |
| 84 | ~proc_time_group_by_size |
| 85 | ~proc_time_group_by_head |
| 86 | ~proc_time_group_by_digest |
| 87 | = |
| 88 | let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in |
| 89 | let b_to_gb b = (b_to_mb b) /. 1024. in |
| 90 | eprintf "Total time : %.2f wall sec %.2f proc sec\n%!" |
| 91 | wall_time_all |
| 92 | proc_time_all; |
| 93 | eprintf "Considered : %8d files %6.2f Gb\n%!" |
| 94 | !(t.considered_files) |
| 95 | (b_to_gb !(t.considered_bytes)); |
| 96 | eprintf "Sampled : %8d files %6.2f Gb\n%!" |
| 97 | !(t.sampled_files) |
| 98 | (b_to_gb !(t.sampled_bytes)); |
| 99 | eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" |
| 100 | !(t.hashed_files) |
| 101 | (b_to_gb !(t.hashed_bytes)) |
| 102 | wall_time_group_by_digest |
| 103 | proc_time_group_by_digest; |
| 104 | eprintf "Digests : %8d\n%!" |
| 105 | !(t.digests); |
| 106 | eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!" |
| 107 | (!(t.hashed_files) - !(t.digests)) |
| 108 | (b_to_gb !(t.redundant_data)); |
| 109 | eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty); |
| 110 | eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" |
| 111 | !(t.unique_size_files) |
| 112 | (b_to_gb !(t.unique_size_bytes)) |
| 113 | wall_time_group_by_size |
| 114 | proc_time_group_by_size; |
| 115 | eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" |
| 116 | !(t.unique_sample_files) |
| 117 | (b_to_gb !(t.unique_sample_bytes)) |
| 118 | wall_time_group_by_head |
| 119 | proc_time_group_by_head; |
| 120 | eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!" |
| 121 | !(t.ignored_files) |
| 122 | (b_to_gb !(t.ignored_bytes)) |