+ Stream.filter input ~f:(fun {File.path; size} ->
+ incr count.considered;
+ let empty = size = 0 in
+ let ignored =
+ match ignore with
+ | Some regexp when (Str.string_match regexp path 0) ->
+ true
+ | Some _ | None ->
+ false
+ in
+ if empty then incr count.empty;
+ if ignored then incr count.ignored;
+ (not empty) && (not ignored)
+ )
+
+let make_output_fun = function
+ | Stdout ->
+ fun digest n_paths paths ->
+ printf "%s %d\n%!" (Digest.to_hex digest) n_paths;
+ List.iter (StrSet.elements paths) ~f:(printf " %S\n%!")
+ | Directory dir ->
+ fun digest _ paths ->
+ let digest = Digest.to_hex digest in
+ let dir = Filename.concat dir (String.sub digest 0 2) in
+ Unix.mkdir dir ~perm:0o700;
+ let oc = open_out (Filename.concat dir digest) in
+ List.iter (StrSet.elements paths) ~f:(fun path ->
+ output_string oc (sprintf "%S\n%!" path)
+ );
+ close_out oc
+
+let sample path ~len =
+ let buf = Bytes.make len ' ' in
+ let ic = open_in_bin path in
+ let rec read pos len =
+ assert (len >= 0);
+ if len = 0 then
+ ()
+ else begin
+ let chunk_size = input ic buf pos len in
+ if chunk_size = 0 then (* EOF *)
+ ()
+ else
+ read (pos + chunk_size) (len - chunk_size)
+ end
+ in
+ read 0 len;
+ close_in ic;
+ Bytes.to_string buf
+
+let main {input; output; ignore; sample = sample_len} =