Commit | Line | Data |
---|---|---|
96c72196 SK |
1 | #! /usr/bin/awk -f |
2 | # | |
3 | # Plot Simply | |
4 | # | |
5 | # TODO: Incremental plotting (new canvas for each data line) | |
6 | # TODO: Support manual set of max y | |
7 | ||
8 | function orange(str) {return "\033[33m" str "\033[0m";} | |
9 | function orange_bright(str) {return "\033[1;33m" str "\033[0m";} | |
10 | function green(str) {return "\033[32m" str "\033[0m";} | |
11 | function grey_dark(str) {return "\033[30m" str "\033[0m";} | |
12 | function grey_light(str) {return "\033[37m" str "\033[0m";} | |
13 | ||
14 | function abs(n) {return n >= 0 ? n : -n} | |
15 | function round(n_float) {return int(n_float + 0.5)} | |
16 | function log_10(n) {return log(n) / log(10)} | |
17 | ||
18 | function width(n, w) { | |
19 | # | |
20 | # log(0) == -inf | |
21 | # | |
22 | w = 1 + round(n > 0 ? log_10(n) : (n == 0 ? 1 : log_10(-n) + 1)); # +1 for neg sign | |
23 | #printf("n: %d, w: %d\n", n, w); | |
24 | return w; | |
25 | } | |
26 | ||
27 | function bins_append_item_to_bin(bin, val, bin_i) { | |
28 | bin_i = bin_item_count[bin]++; | |
29 | bins[bin, bin_i] = val; | |
30 | } | |
31 | ||
32 | BEGIN { | |
33 | # CLI options: w, h, p, a, b | |
34 | limits["canvas_width"] = (w ? w : 70); | |
35 | limits["canvas_height"] = (h ? h : 20); | |
36 | char_point = orange_bright(p ? p : "."); | |
37 | char_pad = grey_dark(a ? a : ""); # FIXME: non-blank pad is broken after adding the y labels | |
38 | char_blank = grey_dark(b ? b : "|"); | |
39 | aggregation = g ? g : "mean"; | |
40 | data_field = df ? df : 1; | |
41 | ||
42 | y_orig_min = 0; | |
43 | y_orig_max = 0; | |
44 | } | |
45 | ||
46 | { | |
47 | y_orig = $data_field; | |
48 | if (y_orig > y_orig_max) y_orig_max = y_orig; | |
49 | if (y_orig < y_orig_min) y_orig_min = y_orig; | |
50 | data_0[NR] = y_orig; | |
51 | } | |
52 | ||
53 | function aggregate_mean() { | |
54 | } | |
55 | ||
56 | function aggregate(array) { | |
57 | if (aggregation == "mean") { | |
58 | return aggregate_mean(array); | |
59 | } else { | |
60 | printf("Unknown aggregation: %s\n", aggregation) > "/dev/stderr"; | |
61 | exit(2); | |
62 | } | |
63 | } | |
64 | ||
65 | function data_scaled_x_to_width(\ | |
66 | data_in, data_out, limits, | |
67 | ||
68 | x_orig, | |
69 | datum, | |
70 | x_bin, | |
71 | bins, | |
72 | bin_item_counts, | |
73 | bin_sum, | |
74 | bin_item_count, | |
75 | i, | |
76 | bin_item, | |
77 | bin_mean \ | |
78 | ) { | |
79 | # Find limit points | |
80 | x_orig_min = 0; | |
81 | x_orig_max = 0; | |
82 | for (x_orig in data_in) { | |
83 | if (x_orig > x_orig_max) x_orig_max = x_orig; | |
84 | if (x_orig < x_orig_min) x_orig_min = x_orig; | |
85 | } | |
86 | #x_orig_range = x_orig_max - x_orig_min; | |
87 | # Place items in bins | |
88 | x_bin_min = 0; | |
89 | x_bin_max = 0; | |
90 | for (x_orig in data_in) { | |
91 | datum = data_in[x_orig]; | |
92 | x_bin = round(x_orig * limits["canvas_width_x"] / length(data_in)); | |
93 | if (x_bin > x_bin_max) x_bin_max = x_bin; | |
94 | if (x_bin < x_bin_min) x_bin_min = x_bin; | |
95 | bin_item_count = ++bin_item_counts[x_bin]; | |
96 | bins[x_bin, bin_item_count] = datum; | |
97 | #printf("x_orig: %f, x_bin: %f, bin_item_count: %f, datum: %f\n", x_orig, x_bin, bin_item_count, datum); | |
98 | } | |
99 | # Aggregate bins | |
100 | for (x_bin in bin_item_counts) { | |
101 | bin_sum = 0; | |
102 | bin_item_count = bin_item_counts[x_bin]; | |
103 | for (i = 1; i <= bin_item_count; i++) { | |
104 | bin_item = bins[x_bin, i]; | |
105 | bin_sum += bin_item; | |
106 | #printf("x_bin: %f, bin_item_count: %f, bin_item: %f, bin_sum: %f\n", x_bin, bin_item_count, bin_item, bin_sum); | |
107 | } | |
108 | bin_mean = bin_sum / bin_item_count; | |
109 | data_out[x_bin] = bin_mean; | |
110 | #printf("x_bin: %f, bin_mean: %f, bin_sum: %f, bin_item_count: %f\n", | |
111 | #x_bin, bin_mean, bin_sum, bin_item_count); | |
112 | } | |
113 | limits["x_min"] = x_bin_min; | |
114 | limits["x_max"] = x_bin_max; | |
115 | } | |
116 | ||
117 | function data_scaled_y_to_height(\ | |
118 | data_in, data_out, limits, | |
119 | ||
120 | data_scaled, x, y, y_orig, y_scaled \ | |
121 | ) { | |
122 | # Offset orig | |
123 | # TODO: Is there a better, closed-form way to get the offset? | |
124 | # TODO: Is there better way to map canvas to value ranges altogether? | |
125 | if (y_orig_min < 0) { | |
126 | offset_orig = -1 * y_orig_min; | |
127 | } else { | |
128 | offset_orig = 0; | |
129 | } | |
130 | for (x in data_in) { | |
131 | y = data_in[x]; | |
132 | #printf("x: %f, y: %f, y_min: %f, y_max: %f\n", x, y, y_min, y_max); | |
133 | data_in_offsetted[x] = y + offset_orig; | |
134 | } | |
135 | y_orig_offseted_min = y_orig_min + offset_orig; | |
136 | y_orig_offseted_max = y_orig_max + offset_orig; | |
137 | ||
138 | # Scale to height | |
139 | y_scaled_min = 0; | |
140 | y_scaled_max = 0; | |
141 | for (x in data_in_offsetted) { | |
142 | y_orig_offsetted = data_in_offsetted[x]; | |
143 | y_scaled = \ | |
144 | y_orig_offseted_max > 0 \ | |
145 | ? round((y_orig_offsetted * limits["canvas_height"]) / y_orig_offseted_max) \ | |
146 | : 0; | |
147 | #printf(\ | |
148 | #"x: %6.2f, y_orig_offsetted: %6.2f, y_orig_max: %6.2f, y_scaled: %6.2f\n", | |
149 | #x, y_orig_offsetted, y_orig_max, y_scaled); | |
150 | if (y_scaled > y_scaled_max) y_scaled_max = y_scaled; | |
151 | if (y_scaled < y_scaled_min) y_scaled_min = y_scaled; | |
152 | data_out[x] = y_scaled | |
153 | } | |
154 | ||
155 | # Save limits | |
156 | limits["y_min"] = y_scaled_min; | |
157 | limits["y_max"] = y_scaled_max; | |
158 | range_orig = y_orig_max - y_orig_min; | |
159 | offset_scaled = \ | |
160 | range_orig > 0 \ | |
161 | ? round(offset_orig * limits["canvas_height"] / range_orig) \ | |
162 | : 0; | |
163 | limits["offset_scaled"] = offset_scaled; | |
164 | #printf("offset_orig: %f, offset_scaled: %f\n", offset_orig, offset_scaled); | |
165 | } | |
166 | ||
167 | function canvas_init(canvas, width, height, row, col) { | |
168 | for (row=0; row <= height; row++) { | |
169 | for (col=0; col <= width; col++) { | |
170 | canvas[row, col] = char_pad char_blank char_pad; | |
171 | } | |
172 | } | |
173 | } | |
174 | ||
175 | function canvas_overlay_highlight_ticks_x(canvas, limits, row, col) { | |
176 | for (col=limits["canvas_width_y"] - 1; col <= limits["canvas_width"]; col++) { | |
177 | offset = limits["offset_scaled"]; | |
178 | #printf("offset: %f\n", offset); | |
179 | row = 0 + offset; | |
180 | #row = 0; | |
181 | canvas[row, col] = char_pad green("-") char_pad; | |
182 | } | |
183 | } | |
184 | ||
185 | function canvas_overlay_highlight_zero_row(canvas, limits, row, col) { | |
186 | #print "canvas_overlay_highlight_zero_row"; | |
187 | for (col=limits["canvas_width_y"] - 1; col <= limits["canvas_width"]; col++) { | |
188 | offset = limits["offset_scaled"]; | |
189 | row = 0 + offset; | |
190 | #printf("col: %6.2f, row: %6.2f, offset: %f\n", col, row, offset); | |
191 | #row = 0; | |
192 | canvas[row, col] = char_pad green("-") char_pad; | |
193 | } | |
194 | } | |
195 | ||
196 | function canvas_overlay_highlight_zero_col(canvas, limits, row, col) { | |
197 | #print "canvas_overlay_highlight_zero_col"; | |
198 | for (row=0; row <= limits["canvas_height"]; row++) { | |
199 | col = limits["canvas_width_y"]; # was also -1. Why? | |
200 | # TODO: Refactor color/character configs to ease composition | |
201 | canvas[row, col] = green("|"); | |
202 | } | |
203 | canvas[limits["canvas_height"], limits["canvas_width_y"]] = green("+"); | |
204 | canvas[0 , limits["canvas_width_y"]] = green("+"); | |
205 | } | |
206 | function canvas_overlay_highlight_zero(canvas, limits) { | |
207 | #print "canvas_overlay_highlight_zero"; | |
208 | canvas[0 + limits["offset_scaled"], 0 + limits["canvas_width_y"]] = green("+"); | |
209 | } | |
210 | ||
211 | function canvas_overlay_data(canvas, data, limits, x_data, x_canvas, y, yi, yj) { | |
212 | #print "canvas_overlay_data"; | |
213 | for (x_data in data) { | |
214 | x_canvas = x_data + limits["canvas_width_y"] + 1; | |
215 | y = data[x_data]; | |
216 | # TODO: Would be nice to scale width of all cells to the widest | |
217 | #point = y; | |
218 | #printf("canvas_width_y: %6.2f, x0: %6.2f, x1: %6.2f, x: %6.2f, y: %6.2f\n", | |
219 | #limits["canvas_width_y"], x0, x1, x, y); | |
220 | # TODO: This special case for 0 is kind of a kludge - can we do better? | |
221 | canvas[y, x_canvas] = x_data == 0 ? char_point : char_pad char_point char_pad; | |
222 | ||
223 | if (y > limits["offset_scaled"]) { | |
224 | for (yi = y - 1; yi >= limits["offset_scaled"]; yi--) { | |
225 | #printf("yi: %6.2f\n", yi); | |
226 | canvas[yi, x_canvas] = x_data == 0 ? orange("|") : char_pad orange("|") char_pad; | |
227 | } | |
228 | } else if (y < limits["offset_scaled"]) { | |
229 | for (yj = limits["offset_scaled"]; yj > y; yj--) { | |
230 | #printf("yj: %6.2f\n", yj); | |
231 | canvas[yj, x_canvas] = x_data == 0 ? orange("|") : char_pad orange("|") char_pad; | |
232 | } | |
233 | } | |
234 | } | |
235 | } | |
236 | ||
237 | function canvas_overlay_y_lab(canvas, limits, y_lab_fmt, y_max_str, i) { | |
238 | y_lab_fmt = "%" limits["canvas_width_y"] - 1 "d "; | |
239 | y_max_str = sprintf(y_lab_fmt, y_orig_max); | |
240 | y_min_str = sprintf(y_lab_fmt, y_orig_min); | |
241 | #printf("y_width: %f, y_max_str: \"%s\", y_min_str: \"%s\"\n", limits["canvas_width_y"], y_max_str, y_min_str); | |
242 | for (i=1; i<=length(y_max_str); i++) { | |
243 | canvas[limits["canvas_height"], i - 1] = substr(y_max_str, i, 1); | |
244 | } | |
245 | canvas[0 + limits["offset_scaled"], 0 + limits["canvas_width_y"] - 1] = 0; | |
246 | for (i=1; i<=length(y_min_str); i++) { | |
247 | canvas[0, i - 1] = substr(y_min_str, i, 1); | |
248 | } | |
249 | } | |
250 | ||
251 | function canvas_print(canvas, limits, row, col) { | |
252 | for (row = limits["canvas_height"]; row >= 0; row--) { | |
253 | for (col = 0; col <= limits["canvas_width"]; col++) { | |
254 | printf("%s", canvas[row, col]); | |
255 | } | |
256 | printf("\n"); | |
257 | } | |
258 | } | |
259 | ||
260 | END { | |
261 | # Find maximum y number width | |
262 | y_orig_min_width = width(y_orig_min); | |
263 | y_orig_max_width = width(y_orig_max); | |
264 | if (y_orig_max_width >= y_orig_min_width) { | |
265 | y_width = y_orig_max_width; | |
266 | } else { | |
267 | y_width = y_orig_min_width; | |
268 | } | |
269 | limits["canvas_width_y"] = y_width + 1; | |
270 | limits["canvas_width_x"] = limits["canvas_width"] - limits["canvas_width_y"]; | |
271 | ||
272 | data_scaled_x_to_width(data_0, data_1, limits); | |
273 | data_scaled_y_to_height(data_1, data_2, limits); | |
274 | ||
275 | canvas_init(canvas, limits["canvas_width"], limits["canvas_height"]); | |
276 | canvas_overlay_highlight_zero_row(canvas, limits); | |
277 | canvas_overlay_highlight_zero_col(canvas, limits); | |
278 | canvas_overlay_highlight_zero(canvas, limits); | |
279 | canvas_overlay_y_lab(canvas, limits); | |
280 | canvas_overlay_data(canvas, data_2, limits); | |
281 | #for (l in limits) { | |
282 | #printf("limits[%s] -> %s\n", l, limits[l]); | |
283 | #} | |
284 | canvas_print(canvas, limits); | |
285 | } | |
286 | ||
287 | # An even better way to think about scaling: ratios!!! Duh! :-D | |
288 | # | |
289 | # val_max / val_current = width / val_scaled | |
290 | # | |
291 | # val_max width | |
292 | # ----------- = ------------ | |
293 | # val_current val_scaled | |
294 | # | |
295 | # val_max * val_scaled = val_current * width | |
296 | # val_scaled = (val_current * width) / val_max | |
297 | # | |
298 | # | |
299 | # num_data_points width | |
300 | # ---------------- = ----- | |
301 | # x 1 | |
302 | # | |
303 | # width * x = num_data_points | |
304 | # x = num_data_points / width | |
305 | # | |
306 | # But that is what I already tried, and it is awkward to scale up when | |
307 | # thinking thsese terms, so it is much better to first route each data | |
308 | # point to an appropriate bin and then aggregate each bin: | |
309 | # 1. Route: bins[scale(datum)] | |
310 | # 2. Aggregate: for bin in bins: for val in bin: aggregate(val) |