Commit | Line | Data |
---|---|---|
ddcbda00 SK |
1 | module Array = ArrayLabels |
2 | module List = ListLabels | |
3 | module Unix = UnixLabels | |
4 | ||
5 | module M = Metrics | |
6 | ||
7 | type t = | |
8 | { path : string | |
9 | ; size : int | |
10 | } | |
11 | ||
12 | let lookup paths = | |
13 | Stream.map paths ~f:(fun path -> | |
14 | let {Unix.st_size = size; _} = Unix.lstat path in | |
15 | {path; size} | |
16 | ) | |
17 | ||
18 | let find root = | |
19 | let dirs = Queue.create () in | |
20 | let files = Queue.create () in | |
21 | let explore parent = | |
22 | Array.iter (Sys.readdir parent) ~f:(fun child -> | |
23 | let path = Filename.concat parent child in | |
24 | let {Unix.st_kind = file_kind; st_size; _} = Unix.lstat path in | |
25 | match file_kind with | |
26 | | Unix.S_REG -> | |
27 | let file = {path; size = st_size} in | |
28 | Queue.add file files | |
29 | | Unix.S_DIR -> | |
30 | Queue.add path dirs | |
31 | | Unix.S_CHR | |
32 | | Unix.S_BLK | |
33 | | Unix.S_LNK | |
34 | | Unix.S_FIFO | |
35 | | Unix.S_SOCK -> | |
36 | () | |
37 | ) | |
38 | in | |
39 | explore root; | |
40 | let rec next () = | |
41 | match Queue.is_empty files, Queue.is_empty dirs with | |
42 | | false, _ -> Some (Queue.take files) | |
43 | | true , true -> None | |
44 | | true , false -> | |
45 | explore (Queue.take dirs); | |
46 | next () | |
47 | in | |
48 | Stream.create next | |
49 | ||
50 | let filter_out_singletons files ~group ~handle_singleton = | |
51 | let q = Queue.create () in | |
52 | Stream.iter (Stream.group_by files ~f:group) ~f:(fun group -> | |
53 | let (_, n, members) = group in | |
54 | if n > 1 then | |
55 | List.iter members ~f:(fun m -> Queue.add m q) | |
56 | else | |
57 | handle_singleton group | |
58 | ); | |
59 | Stream.of_queue q | |
60 | ||
61 | let filter_out_unique_sizes files ~metrics = | |
62 | filter_out_singletons | |
63 | files | |
64 | ~group:(fun {size; _} -> size) | |
65 | ~handle_singleton:(fun (size, _, _) -> M.file_unique_size metrics ~size) | |
66 | ||
67 | let head {path; _} ~len ~metrics = | |
68 | M.file_sampled metrics; | |
69 | let buf = Bytes.make len ' ' in | |
70 | let ic = open_in_bin path in | |
71 | let rec read pos len = | |
72 | assert (len >= 0); | |
73 | if len = 0 then | |
74 | () | |
75 | else begin | |
76 | let chunk_size = input ic buf pos len in | |
77 | M.chunk_read metrics ~size:chunk_size; | |
78 | if chunk_size = 0 then (* EOF *) | |
79 | () | |
80 | else | |
81 | read (pos + chunk_size) (len - chunk_size) | |
82 | end | |
83 | in | |
84 | read 0 len; | |
85 | close_in ic; | |
86 | Bytes.to_string buf | |
87 | ||
88 | let filter_out_unique_heads files ~len ~metrics = | |
89 | filter_out_singletons | |
90 | files | |
91 | ~group:(head ~len ~metrics) | |
92 | ~handle_singleton:(fun (_, _, files) -> | |
93 | let {size; _} = List.hd files in (* Guaranteed non-empty *) | |
94 | M.file_unique_sample metrics ~size | |
95 | ) |