4 { considered_files : int ref
5 ; considered_bytes : int ref
7 ; ignored_files : int ref
8 ; ignored_bytes : int ref
9 ; unique_size_files : int ref
10 ; unique_size_bytes : int ref
11 ; unique_sample_files : int ref
12 ; unique_sample_bytes : int ref
13 ; sampled_files : int ref
14 ; sampled_bytes : int ref
15 ; hashed_files : int ref
16 ; hashed_bytes : int ref
18 ; redundant_data : int ref
22 { considered_files = ref 0
23 ; considered_bytes = ref 0
25 ; ignored_files = ref 0
26 ; ignored_bytes = ref 0
27 ; unique_size_files = ref 0
28 ; unique_size_bytes = ref 0
29 ; sampled_files = ref 0
30 ; sampled_bytes = ref 0
31 ; hashed_files = ref 0
32 ; hashed_bytes = ref 0
33 ; unique_sample_files = ref 0
34 ; unique_sample_bytes = ref 0
36 ; redundant_data = ref 0
42 let file_considered t ~size =
43 incr t.considered_files;
44 add t.considered_bytes size
46 let file_ignored {ignored_files; ignored_bytes; _} ~size =
48 add ignored_bytes size
53 let chunk_read t ~size =
54 add t.sampled_bytes size
59 let file_unique_size t ~size =
60 incr t.unique_size_files;
61 add t.unique_size_bytes size
63 let file_unique_sample t ~size =
64 incr t.unique_sample_files;
65 add t.unique_sample_bytes size
67 let file_hashed t ~size =
69 add t.hashed_bytes size
74 let redundant_data t ~size =
75 add t.redundant_data size
80 ~wall_time_group_by_size
81 ~wall_time_group_by_head
82 ~wall_time_group_by_digest
84 ~proc_time_group_by_size
85 ~proc_time_group_by_head
86 ~proc_time_group_by_digest
88 let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in
89 let b_to_gb b = (b_to_mb b) /. 1024. in
90 eprintf "Total time : %.2f wall sec %.2f proc sec\n%!"
93 eprintf "Considered : %8d files %6.2f Gb\n%!"
95 (b_to_gb !(t.considered_bytes));
96 eprintf "Sampled : %8d files %6.2f Gb\n%!"
98 (b_to_gb !(t.sampled_bytes));
99 eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
101 (b_to_gb !(t.hashed_bytes))
102 wall_time_group_by_digest
103 proc_time_group_by_digest;
104 eprintf "Digests : %8d\n%!"
106 eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!"
107 (!(t.hashed_files) - !(t.digests))
108 (b_to_gb !(t.redundant_data));
109 eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty);
110 eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
111 !(t.unique_size_files)
112 (b_to_gb !(t.unique_size_bytes))
113 wall_time_group_by_size
114 proc_time_group_by_size;
115 eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
116 !(t.unique_sample_files)
117 (b_to_gb !(t.unique_sample_bytes))
118 wall_time_group_by_head
119 proc_time_group_by_head;
120 eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!"
122 (b_to_gb !(t.ignored_bytes))