X-Git-Url: https://git.xandkar.net/?p=dups.git;a=blobdiff_plain;f=dups.ml;h=c627369c5de0c44d6b6d6cbb12921f9382d17276;hp=28c266388e3bd0e31d932fcf9734dc3d0bb248ba;hb=8c54ccb832581554a2c272c0f08b4300081d22b5;hpb=5c0100d2862a04f55b18b702da4f617302b913f9;ds=sidebyside diff --git a/dups.ml b/dups.ml index 28c2663..c627369 100644 --- a/dups.ml +++ b/dups.ml @@ -256,6 +256,15 @@ let main {input; output; ignore; sample = sample_len} = in Hashtbl.replace tbl group (count + 1, File.Set.add file files) in + (* TODO: Make a nice(r) abstraction to re-assemble pieces in the pipeline: + * + * from input to files_by_size + * from files_by_size to files_by_sample + * from files_by_sample to files_by_digest + * from files_by_digest to output + * + * input |> files_by_size |> files_by_sample |> files_by_digest |> output + *) Stream.iter input ~f:(fun ({File.size; _} as file) -> process files_by_size ~group:size ~file );