+ let b_to_mb b = (float_of_int b) /. 1024. /. 1024. in
+ let b_to_gb b = (b_to_mb b) /. 1024. in
+ eprintf "Time : %8.2f seconds\n%!" (t1 -. t0);
+ eprintf "Considered : %8d files %6.2f Gb\n%!"
+ !(count.considered_files)
+ (b_to_gb !(count.considered_bytes));
+ eprintf "Sampled : %8d files %6.2f Gb\n%!"
+ !(count.sampled_files)
+ (b_to_gb !(count.sampled_bytes));
+ eprintf "Hashed : %8d files %6.2f Gb %6.2f seconds\n%!"
+ !(count.hashed_files)
+ (b_to_gb !(count.hashed_bytes))
+ (t1_group_by_digest -. t0_group_by_digest);
+ eprintf "Digests : %8d\n%!"
+ !(count.digests);
+ eprintf "Duplicates (Hashed - Digests): %8d\n%!"
+ (!(count.hashed_files) - !(count.digests));
+ eprintf "Skipped due to 0 size : %8d files\n%!" !(count.empty);
+ eprintf "Skipped due to unique size : %8d files %6.2f Gb %6.2f seconds\n%!"
+ !(count.unique_size_files)
+ (b_to_gb !(count.unique_size_bytes))
+ (t1_group_by_size -. t0_group_by_size);
+ eprintf "Skipped due to unique sample : %8d files %6.2f Gb %6.2f seconds\n%!"
+ !(count.unique_sample_files)
+ (b_to_gb !(count.unique_sample_bytes))
+ (t1_group_by_sample -. t0_group_by_sample);
+ eprintf "Ignored due to regex match : %8d files %6.2f Gb\n%!"
+ !(count.ignored_files)
+ (b_to_gb !(count.ignored_bytes))
+
+let get_opt () : opt =
+ let assert_ test x msg =
+ if not (test x) then begin
+ eprintf "%s\n%!" msg;
+ exit 1
+ end
+ in
+ let assert_file_exists path =
+ assert_ Sys.file_exists path (sprintf "File does not exist: %S" path)
+ in
+ let assert_file_is_dir path =
+ assert_ Sys.is_directory path (sprintf "File is not a directory: %S" path)
+ in
+ let input = ref Stdin in
+ let output = ref Stdout in
+ let ignore = ref None in
+ let sample = ref 256 in
+ let spec =
+ [ ( "-out"
+ , Arg.String (fun path ->
+ assert_file_exists path;
+ assert_file_is_dir path;
+ output := Directory path
+ )
+ , " Output to this directory instead of stdout."
+ )
+ ; ( "-ignore"
+ , Arg.String (fun regexp -> ignore := Some (Str.regexp regexp))
+ , " Ignore file paths which match this regexp pattern (see Str module)."
+ )
+ ; ( "-sample"
+ , Arg.Set_int sample
+ , (sprintf " Byte size of file samples to use. Default: %d" !sample)
+ )
+ ]
+ in
+ Arg.parse
+ (Arg.align spec)
+ (fun path ->
+ assert_file_exists path;
+ assert_file_is_dir path;
+ match !input with
+ | Stdin ->
+ input := Directories [path]
+ | Directories paths ->
+ input := Directories (path :: paths)
+ )
+ "";
+ assert_
+ (fun x -> x > 0)
+ !sample
+ (sprintf "Sample size cannot be negative: %d" !sample);
+ { input = !input
+ ; output = !output
+ ; ignore = !ignore
+ ; sample = !sample
+ }