- let t1_group_by_size = Sys.time () in
- let t0_group_by_sample = Sys.time () in
- Hashtbl.iter
- (fun _ (n, files) ->
- (* Skip files with unique sizes *)
- if n > 1 then
- File.Set.iter
- (fun ({File.path; _} as file) ->
- incr count.sampled_files;
- process
- files_by_sample
- ~group:(sample path ~len:sample_len ~count)
- ~file
- )
- files
- else
- File.Set.iter
- (fun {File.size; _} ->
- incr count.unique_size_files;
- add count.unique_size_bytes size
- )
- files
- )
- files_by_size;
- let t1_group_by_sample = Sys.time () in
- let t0_group_by_digest = Sys.time () in
- Hashtbl.iter
- (fun _ (n, files) ->
- (* Skip files with unique samples *)
- if n > 1 then
- File.Set.iter
- (fun ({File.path; size} as file) ->
- incr count.hashed_files;
- add count.hashed_bytes size;
- process files_by_digest ~group:(Digest.file path) ~file
- )
- files
- else
- File.Set.iter
- (fun {File.size; _} ->
- incr count.unique_sample_files;
- add count.unique_sample_bytes size;
- )
- files
- )
- files_by_sample;
- let t1_group_by_digest = Sys.time () in
- Hashtbl.iter
- (fun d (n, files) ->
- incr count.digests;
- if n > 1 then
- output d n files
- )
- files_by_digest;
- let t1 = Sys.time () in
- let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in
- let b_to_gb b = (b_to_mb b) /. 1024. in
- eprintf "Time : %8.2f seconds\n%!" (t1 -. t0);
- eprintf "Considered : %8d files %6.2f Gb\n%!"
- !(count.considered_files)
- (b_to_gb !(count.considered_bytes));
- eprintf "Sampled : %8d files %6.2f Gb\n%!"
- !(count.sampled_files)
- (b_to_gb !(count.sampled_bytes));
- eprintf "Hashed : %8d files %6.2f Gb %6.2f seconds\n%!"
- !(count.hashed_files)
- (b_to_gb !(count.hashed_bytes))
- (t1_group_by_digest -. t0_group_by_digest);
- eprintf "Digests : %8d\n%!"
- !(count.digests);
- eprintf "Duplicates (Hashed - Digests): %8d\n%!"
- (!(count.hashed_files) - !(count.digests));
- eprintf "Skipped due to 0 size : %8d files\n%!" !(count.empty);
- eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f seconds\n%!"
- !(count.unique_size_files)
- (b_to_gb !(count.unique_size_bytes))
- (t1_group_by_size -. t0_group_by_size);
- eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f seconds\n%!"
- !(count.unique_sample_files)
- (b_to_gb !(count.unique_sample_bytes))
- (t1_group_by_sample -. t0_group_by_sample);
- eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!"
- !(count.ignored_files)
- (b_to_gb !(count.ignored_bytes))
+
+ let pt1_all = time_proc () in
+ let wt1_all = time_wall () in
+
+ M.report metrics
+ ~wall_time_all: (wt1_all -. wt0_all)
+ ~wall_time_group_by_size: (wt1_group_by_size -. wt0_group_by_size)
+ ~wall_time_group_by_head: (wt1_group_by_sample -. wt0_group_by_sample)
+ ~wall_time_group_by_digest:(wt1_group_by_digest -. wt0_group_by_digest)
+ ~proc_time_all: (pt1_all -. pt0_all)
+ ~proc_time_group_by_size: (pt1_group_by_size -. pt0_group_by_size)
+ ~proc_time_group_by_head: (pt1_group_by_sample -. pt0_group_by_sample)
+ ~proc_time_group_by_digest:(pt1_group_by_digest -. pt0_group_by_digest)