+module StrSet = Set.Make(String)
+module Unix = UnixLabels
+
+module Metrics : sig
+ type t
+
+ val init
+ : unit -> t
+ val report
+ : t
+ -> wall_time_all:float
+ -> wall_time_group_by_size:float
+ -> wall_time_group_by_head:float
+ -> wall_time_group_by_digest:float
+ -> proc_time_all:float
+ -> proc_time_group_by_size:float
+ -> proc_time_group_by_head:float
+ -> proc_time_group_by_digest:float
+ -> unit
+
+ val file_considered
+ : t -> size:int -> unit
+ val file_ignored
+ : t -> size:int -> unit
+ val file_empty
+ : t -> unit
+ val file_sampled
+ : t -> unit
+ val chunk_read
+ : t -> size:int -> unit
+ val file_unique_size
+ : t -> size:int -> unit
+ val file_unique_sample
+ : t -> size:int -> unit
+ val file_hashed
+ : t -> size:int -> unit
+ val digest
+ : t -> unit
+ val redundant_data
+ : t -> size:int -> unit
+end = struct
+ type t =
+ { considered_files : int ref
+ ; considered_bytes : int ref
+ ; empty : int ref
+ ; ignored_files : int ref
+ ; ignored_bytes : int ref
+ ; unique_size_files : int ref
+ ; unique_size_bytes : int ref
+ ; unique_sample_files : int ref
+ ; unique_sample_bytes : int ref
+ ; sampled_files : int ref
+ ; sampled_bytes : int ref
+ ; hashed_files : int ref
+ ; hashed_bytes : int ref
+ ; digests : int ref
+ ; redundant_data : int ref
+ }
+
+ let init () =
+ { considered_files = ref 0
+ ; considered_bytes = ref 0
+ ; empty = ref 0
+ ; ignored_files = ref 0
+ ; ignored_bytes = ref 0
+ ; unique_size_files = ref 0
+ ; unique_size_bytes = ref 0
+ ; sampled_files = ref 0
+ ; sampled_bytes = ref 0
+ ; hashed_files = ref 0
+ ; hashed_bytes = ref 0
+ ; unique_sample_files = ref 0
+ ; unique_sample_bytes = ref 0
+ ; digests = ref 0
+ ; redundant_data = ref 0
+ }
+
+ let add sum addend =
+ sum := !sum + addend
+
+ let file_considered t ~size =
+ incr t.considered_files;
+ add t.considered_bytes size
+
+ let file_ignored {ignored_files; ignored_bytes; _} ~size =
+ incr ignored_files;
+ add ignored_bytes size
+
+ let file_empty t =
+ incr t.empty
+
+ let chunk_read t ~size =
+ add t.sampled_bytes size
+
+ let file_sampled t =
+ incr t.sampled_files
+
+ let file_unique_size t ~size =
+ incr t.unique_size_files;
+ add t.unique_size_bytes size
+
+ let file_unique_sample t ~size =
+ incr t.unique_sample_files;
+ add t.unique_sample_bytes size
+
+ let file_hashed t ~size =
+ incr t.hashed_files;
+ add t.hashed_bytes size
+
+ let digest t =
+ incr t.digests
+
+ let redundant_data t ~size =
+ add t.redundant_data size
+
+ let report
+ t
+ ~wall_time_all
+ ~wall_time_group_by_size
+ ~wall_time_group_by_head
+ ~wall_time_group_by_digest
+ ~proc_time_all
+ ~proc_time_group_by_size
+ ~proc_time_group_by_head
+ ~proc_time_group_by_digest
+ =
+ let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in
+ let b_to_gb b = (b_to_mb b) /. 1024. in
+ eprintf "Total time : %.2f wall sec %.2f proc sec\n%!"
+ wall_time_all
+ proc_time_all;
+ eprintf "Considered : %8d files %6.2f Gb\n%!"
+ !(t.considered_files)
+ (b_to_gb !(t.considered_bytes));
+ eprintf "Sampled : %8d files %6.2f Gb\n%!"
+ !(t.sampled_files)
+ (b_to_gb !(t.sampled_bytes));
+ eprintf "Hashed : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
+ !(t.hashed_files)
+ (b_to_gb !(t.hashed_bytes))
+ wall_time_group_by_digest
+ proc_time_group_by_digest;
+ eprintf "Digests : %8d\n%!"
+ !(t.digests);
+ eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!"
+ (!(t.hashed_files) - !(t.digests))
+ (b_to_gb !(t.redundant_data));
+ eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty);
+ eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
+ !(t.unique_size_files)
+ (b_to_gb !(t.unique_size_bytes))
+ wall_time_group_by_size
+ proc_time_group_by_size;
+ eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f wall sec %6.2f proc sec\n%!"
+ !(t.unique_sample_files)
+ (b_to_gb !(t.unique_sample_bytes))
+ wall_time_group_by_head
+ proc_time_group_by_head;
+ eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!"
+ !(t.ignored_files)
+ (b_to_gb !(t.ignored_bytes))
+end
+
+module M = Metrics