+ | Stdin ->
+ File.lookup (In_channel.lines stdin)
+ | Directories paths ->
+ let paths = StrSet.elements (StrSet.of_list paths) in
+ Stream.concat (List.map paths ~f:File.find)
+ in
+ Stream.filter input ~f:(fun {File.path; size} ->
+ incr count.considered_files;
+ add count.considered_bytes size;
+ let empty = size = 0 in
+ let ignored =
+ match ignore with
+ | Some regexp when (Str.string_match regexp path 0) ->
+ incr count.ignored_files;
+ add count.ignored_bytes size;
+ true
+ | Some _ | None ->
+ false
+ in
+ if empty then incr count.empty;
+ (not empty) && (not ignored)
+ )
+
+let make_output_fun = function
+ | Stdout ->
+ fun digest n_files files ->
+ printf "%s %d\n%!" (Digest.to_hex digest) n_files;
+ List.iter (File.Set.elements files) ~f:(fun {File.path; _} ->
+ printf " %S\n%!" path
+ )
+ | Directory dir ->
+ fun digest _ files ->
+ let digest = Digest.to_hex digest in
+ let dir = Filename.concat dir (String.sub digest 0 2) in
+ Unix.mkdir dir ~perm:0o700;
+ let oc = open_out (Filename.concat dir digest) in
+ List.iter (File.Set.elements files) ~f:(fun {File.path; _} ->
+ output_string oc (sprintf "%S\n%!" path)
+ );
+ close_out oc
+
+let sample path ~len ~count =
+ let buf = Bytes.make len ' ' in
+ let ic = open_in_bin path in
+ let rec read pos len =
+ assert (len >= 0);
+ if len = 0 then
+ ()
+ else begin
+ let chunk_size = input ic buf pos len in
+ add count.sampled_bytes chunk_size;
+ if chunk_size = 0 then (* EOF *)
+ ()
+ else
+ read (pos + chunk_size) (len - chunk_size)
+ end