Commit | Line | Data |
---|---|---|
ddcbda00 SK |
1 | open Printf |
2 | ||
3 | type t = | |
4 | { considered_files : int ref | |
5 | ; considered_bytes : int ref | |
6 | ; empty : int ref | |
7 | ; ignored_files : int ref | |
8 | ; ignored_bytes : int ref | |
9 | ; unique_size_files : int ref | |
10 | ; unique_size_bytes : int ref | |
11 | ; unique_sample_files : int ref | |
12 | ; unique_sample_bytes : int ref | |
13 | ; sampled_files : int ref | |
14 | ; sampled_bytes : int ref | |
15 | ; hashed_files : int ref | |
16 | ; hashed_bytes : int ref | |
17 | ; digests : int ref | |
18 | ; redundant_data : int ref | |
19 | } | |
20 | ||
21 | let init () = | |
22 | { considered_files = ref 0 | |
23 | ; considered_bytes = ref 0 | |
24 | ; empty = ref 0 | |
25 | ; ignored_files = ref 0 | |
26 | ; ignored_bytes = ref 0 | |
27 | ; unique_size_files = ref 0 | |
28 | ; unique_size_bytes = ref 0 | |
29 | ; sampled_files = ref 0 | |
30 | ; sampled_bytes = ref 0 | |
31 | ; hashed_files = ref 0 | |
32 | ; hashed_bytes = ref 0 | |
33 | ; unique_sample_files = ref 0 | |
34 | ; unique_sample_bytes = ref 0 | |
35 | ; digests = ref 0 | |
36 | ; redundant_data = ref 0 | |
37 | } | |
38 | ||
39 | let add sum addend = | |
40 | sum := !sum + addend | |
41 | ||
42 | let file_considered t ~size = | |
43 | incr t.considered_files; | |
44 | add t.considered_bytes size | |
45 | ||
46 | let file_ignored {ignored_files; ignored_bytes; _} ~size = | |
47 | incr ignored_files; | |
48 | add ignored_bytes size | |
49 | ||
50 | let file_empty t = | |
51 | incr t.empty | |
52 | ||
53 | let chunk_read t ~size = | |
54 | add t.sampled_bytes size | |
55 | ||
56 | let file_sampled t = | |
57 | incr t.sampled_files | |
58 | ||
59 | let file_unique_size t ~size = | |
60 | incr t.unique_size_files; | |
61 | add t.unique_size_bytes size | |
62 | ||
63 | let file_unique_sample t ~size = | |
64 | incr t.unique_sample_files; | |
65 | add t.unique_sample_bytes size | |
66 | ||
67 | let file_hashed t ~size = | |
68 | incr t.hashed_files; | |
69 | add t.hashed_bytes size | |
70 | ||
71 | let digest t = | |
72 | incr t.digests | |
73 | ||
74 | let redundant_data t ~size = | |
75 | add t.redundant_data size | |
76 | ||
77 | let report | |
78 | t | |
79 | ~wall_time_all | |
80 | ~wall_time_group_by_size | |
81 | ~wall_time_group_by_head | |
82 | ~wall_time_group_by_digest | |
83 | ~proc_time_all | |
84 | ~proc_time_group_by_size | |
85 | ~proc_time_group_by_head | |
86 | ~proc_time_group_by_digest | |
87 | = | |
88 | let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in | |
89 | let b_to_gb b = (b_to_mb b) /. 1024. in | |
90 | eprintf "Total time : %.2f wall sec %.2f proc sec\n%!" | |
91 | wall_time_all | |
92 | proc_time_all; | |
93 | eprintf "Considered : %8d files %6.2f Gb\n%!" | |
94 | !(t.considered_files) | |
95 | (b_to_gb !(t.considered_bytes)); | |
96 | eprintf "Sampled : %8d files %6.2f Gb\n%!" | |
97 | !(t.sampled_files) | |
98 | (b_to_gb !(t.sampled_bytes)); | |
99 | eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" | |
100 | !(t.hashed_files) | |
101 | (b_to_gb !(t.hashed_bytes)) | |
102 | wall_time_group_by_digest | |
103 | proc_time_group_by_digest; | |
104 | eprintf "Digests : %8d\n%!" | |
105 | !(t.digests); | |
106 | eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!" | |
107 | (!(t.hashed_files) - !(t.digests)) | |
108 | (b_to_gb !(t.redundant_data)); | |
109 | eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty); | |
110 | eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" | |
111 | !(t.unique_size_files) | |
112 | (b_to_gb !(t.unique_size_bytes)) | |
113 | wall_time_group_by_size | |
114 | proc_time_group_by_size; | |
115 | eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!" | |
116 | !(t.unique_sample_files) | |
117 | (b_to_gb !(t.unique_sample_bytes)) | |
118 | wall_time_group_by_head | |
119 | proc_time_group_by_head; | |
120 | eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!" | |
121 | !(t.ignored_files) | |
122 | (b_to_gb !(t.ignored_bytes)) |