-module Unix = UnixLabels
-
-module Metrics : sig
- type t
-
- val init
- : unit -> t
- val report
- : t
- -> time_all:float
- -> time_group_by_size:float
- -> time_group_by_head:float
- -> time_group_by_digest:float
- -> unit
-
- val file_considered
- : t -> size:int -> unit
- val file_ignored
- : t -> size:int -> unit
- val file_empty
- : t -> unit
- val file_sampled
- : t -> unit
- val chunk_read
- : t -> size:int -> unit
- val file_unique_size
- : t -> size:int -> unit
- val file_unique_sample
- : t -> size:int -> unit
- val file_hashed
- : t -> size:int -> unit
- val digest
- : t -> unit
- val redundant_data
- : t -> size:int -> unit
-end = struct
- type t =
- { considered_files : int ref
- ; considered_bytes : int ref
- ; empty : int ref
- ; ignored_files : int ref
- ; ignored_bytes : int ref
- ; unique_size_files : int ref
- ; unique_size_bytes : int ref
- ; unique_sample_files : int ref
- ; unique_sample_bytes : int ref
- ; sampled_files : int ref
- ; sampled_bytes : int ref
- ; hashed_files : int ref
- ; hashed_bytes : int ref
- ; digests : int ref
- ; redundant_data : int ref
- }
-
- let init () =
- { considered_files = ref 0
- ; considered_bytes = ref 0
- ; empty = ref 0
- ; ignored_files = ref 0
- ; ignored_bytes = ref 0
- ; unique_size_files = ref 0
- ; unique_size_bytes = ref 0
- ; sampled_files = ref 0
- ; sampled_bytes = ref 0
- ; hashed_files = ref 0
- ; hashed_bytes = ref 0
- ; unique_sample_files = ref 0
- ; unique_sample_bytes = ref 0
- ; digests = ref 0
- ; redundant_data = ref 0
- }
-
- let add sum addend =
- sum := !sum + addend
-
- let file_considered t ~size =
- incr t.considered_files;
- add t.considered_bytes size
-
- let file_ignored {ignored_files; ignored_bytes; _} ~size =
- incr ignored_files;
- add ignored_bytes size
-
- let file_empty t =
- incr t.empty
-
- let chunk_read t ~size =
- add t.sampled_bytes size
-
- let file_sampled t =
- incr t.sampled_files
-
- let file_unique_size t ~size =
- incr t.unique_size_files;
- add t.unique_size_bytes size
-
- let file_unique_sample t ~size =
- incr t.unique_sample_files;
- add t.unique_sample_bytes size
-
- let file_hashed t ~size =
- incr t.hashed_files;
- add t.hashed_bytes size
-
- let digest t =
- incr t.digests
-
- let redundant_data t ~size =
- add t.redundant_data size
-
- let report
- t
- ~time_all
- ~time_group_by_size
- ~time_group_by_head
- ~time_group_by_digest
- =
- let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in
- let b_to_gb b = (b_to_mb b) /. 1024. in
- eprintf "Time : %8.2f seconds\n%!"
- time_all;
- eprintf "Considered : %8d files %6.2f Gb\n%!"
- !(t.considered_files)
- (b_to_gb !(t.considered_bytes));
- eprintf "Sampled : %8d files %6.2f Gb\n%!"
- !(t.sampled_files)
- (b_to_gb !(t.sampled_bytes));
- eprintf "Hashed : %8d files %6.2f Gb %6.2f seconds\n%!"
- !(t.hashed_files)
- (b_to_gb !(t.hashed_bytes))
- time_group_by_digest;
- eprintf "Digests : %8d\n%!"
- !(t.digests);
- eprintf "Duplicates (Hashed - Digests): %8d files %6.2f Gb\n%!"
- (!(t.hashed_files) - !(t.digests))
- (b_to_gb !(t.redundant_data));
- eprintf "Skipped due to 0 size : %8d files\n%!" !(t.empty);
- eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f seconds\n%!"
- !(t.unique_size_files)
- (b_to_gb !(t.unique_size_bytes))
- time_group_by_size;
- eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f seconds\n%!"
- !(t.unique_sample_files)
- (b_to_gb !(t.unique_sample_bytes))
- time_group_by_head;
- eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!"
- !(t.ignored_files)
- (b_to_gb !(t.ignored_bytes))
-end