Move modules into dedicated files
[dups.git] / lib / file.ml
1 module Array = ArrayLabels
2 module List = ListLabels
3 module Unix = UnixLabels
4
5 module M = Metrics
6
7 type t =
8 { path : string
9 ; size : int
10 }
11
12 let lookup paths =
13 Stream.map paths ~f:(fun path ->
14 let {Unix.st_size = size; _} = Unix.lstat path in
15 {path; size}
16 )
17
18 let find root =
19 let dirs = Queue.create () in
20 let files = Queue.create () in
21 let explore parent =
22 Array.iter (Sys.readdir parent) ~f:(fun child ->
23 let path = Filename.concat parent child in
24 let {Unix.st_kind = file_kind; st_size; _} = Unix.lstat path in
25 match file_kind with
26 | Unix.S_REG ->
27 let file = {path; size = st_size} in
28 Queue.add file files
29 | Unix.S_DIR ->
30 Queue.add path dirs
31 | Unix.S_CHR
32 | Unix.S_BLK
33 | Unix.S_LNK
34 | Unix.S_FIFO
35 | Unix.S_SOCK ->
36 ()
37 )
38 in
39 explore root;
40 let rec next () =
41 match Queue.is_empty files, Queue.is_empty dirs with
42 | false, _ -> Some (Queue.take files)
43 | true , true -> None
44 | true , false ->
45 explore (Queue.take dirs);
46 next ()
47 in
48 Stream.create next
49
50 let filter_out_singletons files ~group ~handle_singleton =
51 let q = Queue.create () in
52 Stream.iter (Stream.group_by files ~f:group) ~f:(fun group ->
53 let (_, n, members) = group in
54 if n > 1 then
55 List.iter members ~f:(fun m -> Queue.add m q)
56 else
57 handle_singleton group
58 );
59 Stream.of_queue q
60
61 let filter_out_unique_sizes files ~metrics =
62 filter_out_singletons
63 files
64 ~group:(fun {size; _} -> size)
65 ~handle_singleton:(fun (size, _, _) -> M.file_unique_size metrics ~size)
66
67 let head {path; _} ~len ~metrics =
68 M.file_sampled metrics;
69 let buf = Bytes.make len ' ' in
70 let ic = open_in_bin path in
71 let rec read pos len =
72 assert (len >= 0);
73 if len = 0 then
74 ()
75 else begin
76 let chunk_size = input ic buf pos len in
77 M.chunk_read metrics ~size:chunk_size;
78 if chunk_size = 0 then (* EOF *)
79 ()
80 else
81 read (pos + chunk_size) (len - chunk_size)
82 end
83 in
84 read 0 len;
85 close_in ic;
86 Bytes.to_string buf
87
88 let filter_out_unique_heads files ~len ~metrics =
89 filter_out_singletons
90 files
91 ~group:(head ~len ~metrics)
92 ~handle_singleton:(fun (_, _, files) ->
93 let {size; _} = List.hd files in (* Guaranteed non-empty *)
94 M.file_unique_sample metrics ~size
95 )
This page took 0.051045 seconds and 4 git commands to generate.