1 module Array = ArrayLabels
2 module List = ListLabels
3 module Unix = UnixLabels
13 Stream.map paths ~f:(fun path ->
14 let {Unix.st_size = size; _} = Unix.lstat path in
19 let dirs = Queue.create () in
20 let files = Queue.create () in
22 Array.iter (Sys.readdir parent) ~f:(fun child ->
23 let path = Filename.concat parent child in
24 let {Unix.st_kind = file_kind; st_size; _} = Unix.lstat path in
27 let file = {path; size = st_size} in
41 match Queue.is_empty files, Queue.is_empty dirs with
42 | false, _ -> Some (Queue.take files)
45 explore (Queue.take dirs);
50 let filter_out_singletons files ~group ~handle_singleton =
51 let q = Queue.create () in
52 Stream.iter (Stream.group_by files ~f:group) ~f:(fun group ->
53 let (_, n, members) = group in
55 List.iter members ~f:(fun m -> Queue.add m q)
57 handle_singleton group
61 let filter_out_unique_sizes files ~metrics =
64 ~group:(fun {size; _} -> size)
65 ~handle_singleton:(fun (size, _, _) -> M.file_unique_size metrics ~size)
67 let head {path; _} ~len ~metrics =
68 M.file_sampled metrics;
69 let buf = Bytes.make len ' ' in
70 let ic = open_in_bin path in
71 let rec read pos len =
76 let chunk_size = input ic buf pos len in
77 M.chunk_read metrics ~size:chunk_size;
78 if chunk_size = 0 then (* EOF *)
81 read (pos + chunk_size) (len - chunk_size)
88 let filter_out_unique_heads files ~len ~metrics =
91 ~group:(head ~len ~metrics)
92 ~handle_singleton:(fun (_, _, files) ->
93 let {size; _} = List.hd files in (* Guaranteed non-empty *)
94 M.file_unique_sample metrics ~size