| 1 | #! /usr/bin/awk -f |
| 2 | # |
| 3 | # Plot Simply |
| 4 | # |
| 5 | # TODO: Incremental plotting (new canvas for each data line) |
| 6 | # TODO: Support manual set of max y |
| 7 | |
| 8 | function orange(str) {return "\033[33m" str "\033[0m";} |
| 9 | function orange_bright(str) {return "\033[1;33m" str "\033[0m";} |
| 10 | function green(str) {return "\033[32m" str "\033[0m";} |
| 11 | function grey_dark(str) {return "\033[30m" str "\033[0m";} |
| 12 | function grey_light(str) {return "\033[37m" str "\033[0m";} |
| 13 | |
| 14 | function abs(n) {return n >= 0 ? n : -n} |
| 15 | function round(n_float) {return int(n_float + 0.5)} |
| 16 | function log_10(n) {return log(n) / log(10)} |
| 17 | |
| 18 | function width(n, w) { |
| 19 | # |
| 20 | # log(0) == -inf |
| 21 | # |
| 22 | w = 1 + round(n > 0 ? log_10(n) : (n == 0 ? 1 : log_10(-n) + 1)); # +1 for neg sign |
| 23 | #printf("n: %d, w: %d\n", n, w); |
| 24 | return w; |
| 25 | } |
| 26 | |
| 27 | function bins_append_item_to_bin(bin, val, bin_i) { |
| 28 | bin_i = bin_item_count[bin]++; |
| 29 | bins[bin, bin_i] = val; |
| 30 | } |
| 31 | |
| 32 | BEGIN { |
| 33 | # CLI options: w, h, p, a, b |
| 34 | limits["canvas_width"] = (w ? w : 70); |
| 35 | limits["canvas_height"] = (h ? h : 20); |
| 36 | char_point = orange_bright(p ? p : "."); |
| 37 | char_pad = grey_dark(a ? a : ""); # FIXME: non-blank pad is broken after adding the y labels |
| 38 | char_blank = grey_dark(b ? b : "|"); |
| 39 | aggregation = g ? g : "mean"; |
| 40 | data_field = df ? df : 1; |
| 41 | |
| 42 | y_orig_min = 0; |
| 43 | y_orig_max = 0; |
| 44 | } |
| 45 | |
| 46 | { |
| 47 | y_orig = $data_field; |
| 48 | if (y_orig > y_orig_max) y_orig_max = y_orig; |
| 49 | if (y_orig < y_orig_min) y_orig_min = y_orig; |
| 50 | data_0[NR] = y_orig; |
| 51 | } |
| 52 | |
| 53 | function aggregate_mean() { |
| 54 | } |
| 55 | |
| 56 | function aggregate(array) { |
| 57 | if (aggregation == "mean") { |
| 58 | return aggregate_mean(array); |
| 59 | } else { |
| 60 | printf("Unknown aggregation: %s\n", aggregation) > "/dev/stderr"; |
| 61 | exit(2); |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | function data_scaled_x_to_width(\ |
| 66 | data_in, data_out, limits, |
| 67 | |
| 68 | x_orig, |
| 69 | datum, |
| 70 | x_bin, |
| 71 | bins, |
| 72 | bin_item_counts, |
| 73 | bin_sum, |
| 74 | bin_item_count, |
| 75 | i, |
| 76 | bin_item, |
| 77 | bin_mean \ |
| 78 | ) { |
| 79 | # Find limit points |
| 80 | x_orig_min = 0; |
| 81 | x_orig_max = 0; |
| 82 | for (x_orig in data_in) { |
| 83 | if (x_orig > x_orig_max) x_orig_max = x_orig; |
| 84 | if (x_orig < x_orig_min) x_orig_min = x_orig; |
| 85 | } |
| 86 | #x_orig_range = x_orig_max - x_orig_min; |
| 87 | # Place items in bins |
| 88 | x_bin_min = 0; |
| 89 | x_bin_max = 0; |
| 90 | for (x_orig in data_in) { |
| 91 | datum = data_in[x_orig]; |
| 92 | x_bin = round(x_orig * limits["canvas_width_x"] / length(data_in)); |
| 93 | if (x_bin > x_bin_max) x_bin_max = x_bin; |
| 94 | if (x_bin < x_bin_min) x_bin_min = x_bin; |
| 95 | bin_item_count = ++bin_item_counts[x_bin]; |
| 96 | bins[x_bin, bin_item_count] = datum; |
| 97 | #printf("x_orig: %f, x_bin: %f, bin_item_count: %f, datum: %f\n", x_orig, x_bin, bin_item_count, datum); |
| 98 | } |
| 99 | # Aggregate bins |
| 100 | for (x_bin in bin_item_counts) { |
| 101 | bin_sum = 0; |
| 102 | bin_item_count = bin_item_counts[x_bin]; |
| 103 | for (i = 1; i <= bin_item_count; i++) { |
| 104 | bin_item = bins[x_bin, i]; |
| 105 | bin_sum += bin_item; |
| 106 | #printf("x_bin: %f, bin_item_count: %f, bin_item: %f, bin_sum: %f\n", x_bin, bin_item_count, bin_item, bin_sum); |
| 107 | } |
| 108 | bin_mean = bin_sum / bin_item_count; |
| 109 | data_out[x_bin] = bin_mean; |
| 110 | #printf("x_bin: %f, bin_mean: %f, bin_sum: %f, bin_item_count: %f\n", |
| 111 | #x_bin, bin_mean, bin_sum, bin_item_count); |
| 112 | } |
| 113 | limits["x_min"] = x_bin_min; |
| 114 | limits["x_max"] = x_bin_max; |
| 115 | } |
| 116 | |
| 117 | function data_scaled_y_to_height(\ |
| 118 | data_in, data_out, limits, |
| 119 | |
| 120 | data_scaled, x, y, y_orig, y_scaled \ |
| 121 | ) { |
| 122 | # Offset orig |
| 123 | # TODO: Is there a better, closed-form way to get the offset? |
| 124 | # TODO: Is there better way to map canvas to value ranges altogether? |
| 125 | if (y_orig_min < 0) { |
| 126 | offset_orig = -1 * y_orig_min; |
| 127 | } else { |
| 128 | offset_orig = 0; |
| 129 | } |
| 130 | for (x in data_in) { |
| 131 | y = data_in[x]; |
| 132 | #printf("x: %f, y: %f, y_min: %f, y_max: %f\n", x, y, y_min, y_max); |
| 133 | data_in_offsetted[x] = y + offset_orig; |
| 134 | } |
| 135 | y_orig_offseted_min = y_orig_min + offset_orig; |
| 136 | y_orig_offseted_max = y_orig_max + offset_orig; |
| 137 | |
| 138 | # Scale to height |
| 139 | y_scaled_min = 0; |
| 140 | y_scaled_max = 0; |
| 141 | for (x in data_in_offsetted) { |
| 142 | y_orig_offsetted = data_in_offsetted[x]; |
| 143 | y_scaled = \ |
| 144 | y_orig_offseted_max > 0 \ |
| 145 | ? round((y_orig_offsetted * limits["canvas_height"]) / y_orig_offseted_max) \ |
| 146 | : 0; |
| 147 | #printf(\ |
| 148 | #"x: %6.2f, y_orig_offsetted: %6.2f, y_orig_max: %6.2f, y_scaled: %6.2f\n", |
| 149 | #x, y_orig_offsetted, y_orig_max, y_scaled); |
| 150 | if (y_scaled > y_scaled_max) y_scaled_max = y_scaled; |
| 151 | if (y_scaled < y_scaled_min) y_scaled_min = y_scaled; |
| 152 | data_out[x] = y_scaled |
| 153 | } |
| 154 | |
| 155 | # Save limits |
| 156 | limits["y_min"] = y_scaled_min; |
| 157 | limits["y_max"] = y_scaled_max; |
| 158 | range_orig = y_orig_max - y_orig_min; |
| 159 | offset_scaled = \ |
| 160 | range_orig > 0 \ |
| 161 | ? round(offset_orig * limits["canvas_height"] / range_orig) \ |
| 162 | : 0; |
| 163 | limits["offset_scaled"] = offset_scaled; |
| 164 | #printf("offset_orig: %f, offset_scaled: %f\n", offset_orig, offset_scaled); |
| 165 | } |
| 166 | |
| 167 | function canvas_init(canvas, width, height, row, col) { |
| 168 | for (row=0; row <= height; row++) { |
| 169 | for (col=0; col <= width; col++) { |
| 170 | canvas[row, col] = char_pad char_blank char_pad; |
| 171 | } |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | function canvas_overlay_highlight_ticks_x(canvas, limits, row, col) { |
| 176 | for (col=limits["canvas_width_y"] - 1; col <= limits["canvas_width"]; col++) { |
| 177 | offset = limits["offset_scaled"]; |
| 178 | #printf("offset: %f\n", offset); |
| 179 | row = 0 + offset; |
| 180 | #row = 0; |
| 181 | canvas[row, col] = char_pad green("-") char_pad; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | function canvas_overlay_highlight_zero_row(canvas, limits, row, col) { |
| 186 | #print "canvas_overlay_highlight_zero_row"; |
| 187 | for (col=limits["canvas_width_y"] - 1; col <= limits["canvas_width"]; col++) { |
| 188 | offset = limits["offset_scaled"]; |
| 189 | row = 0 + offset; |
| 190 | #printf("col: %6.2f, row: %6.2f, offset: %f\n", col, row, offset); |
| 191 | #row = 0; |
| 192 | canvas[row, col] = char_pad green("-") char_pad; |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | function canvas_overlay_highlight_zero_col(canvas, limits, row, col) { |
| 197 | #print "canvas_overlay_highlight_zero_col"; |
| 198 | for (row=0; row <= limits["canvas_height"]; row++) { |
| 199 | col = limits["canvas_width_y"]; # was also -1. Why? |
| 200 | # TODO: Refactor color/character configs to ease composition |
| 201 | canvas[row, col] = green("|"); |
| 202 | } |
| 203 | canvas[limits["canvas_height"], limits["canvas_width_y"]] = green("+"); |
| 204 | canvas[0 , limits["canvas_width_y"]] = green("+"); |
| 205 | } |
| 206 | function canvas_overlay_highlight_zero(canvas, limits) { |
| 207 | #print "canvas_overlay_highlight_zero"; |
| 208 | canvas[0 + limits["offset_scaled"], 0 + limits["canvas_width_y"]] = green("+"); |
| 209 | } |
| 210 | |
| 211 | function canvas_overlay_data(canvas, data, limits, x_data, x_canvas, y, yi, yj) { |
| 212 | #print "canvas_overlay_data"; |
| 213 | for (x_data in data) { |
| 214 | x_canvas = x_data + limits["canvas_width_y"] + 1; |
| 215 | y = data[x_data]; |
| 216 | # TODO: Would be nice to scale width of all cells to the widest |
| 217 | #point = y; |
| 218 | #printf("canvas_width_y: %6.2f, x0: %6.2f, x1: %6.2f, x: %6.2f, y: %6.2f\n", |
| 219 | #limits["canvas_width_y"], x0, x1, x, y); |
| 220 | # TODO: This special case for 0 is kind of a kludge - can we do better? |
| 221 | canvas[y, x_canvas] = x_data == 0 ? char_point : char_pad char_point char_pad; |
| 222 | |
| 223 | if (y > limits["offset_scaled"]) { |
| 224 | for (yi = y - 1; yi >= limits["offset_scaled"]; yi--) { |
| 225 | #printf("yi: %6.2f\n", yi); |
| 226 | canvas[yi, x_canvas] = x_data == 0 ? orange("|") : char_pad orange("|") char_pad; |
| 227 | } |
| 228 | } else if (y < limits["offset_scaled"]) { |
| 229 | for (yj = limits["offset_scaled"]; yj > y; yj--) { |
| 230 | #printf("yj: %6.2f\n", yj); |
| 231 | canvas[yj, x_canvas] = x_data == 0 ? orange("|") : char_pad orange("|") char_pad; |
| 232 | } |
| 233 | } |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | function canvas_overlay_y_lab(canvas, limits, y_lab_fmt, y_max_str, i) { |
| 238 | y_lab_fmt = "%" limits["canvas_width_y"] - 1 "d "; |
| 239 | y_max_str = sprintf(y_lab_fmt, y_orig_max); |
| 240 | y_min_str = sprintf(y_lab_fmt, y_orig_min); |
| 241 | #printf("y_width: %f, y_max_str: \"%s\", y_min_str: \"%s\"\n", limits["canvas_width_y"], y_max_str, y_min_str); |
| 242 | for (i=1; i<=length(y_max_str); i++) { |
| 243 | canvas[limits["canvas_height"], i - 1] = substr(y_max_str, i, 1); |
| 244 | } |
| 245 | canvas[0 + limits["offset_scaled"], 0 + limits["canvas_width_y"] - 1] = 0; |
| 246 | for (i=1; i<=length(y_min_str); i++) { |
| 247 | canvas[0, i - 1] = substr(y_min_str, i, 1); |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | function canvas_print(canvas, limits, row, col) { |
| 252 | for (row = limits["canvas_height"]; row >= 0; row--) { |
| 253 | for (col = 0; col <= limits["canvas_width"]; col++) { |
| 254 | printf("%s", canvas[row, col]); |
| 255 | } |
| 256 | printf("\n"); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | END { |
| 261 | # Find maximum y number width |
| 262 | y_orig_min_width = width(y_orig_min); |
| 263 | y_orig_max_width = width(y_orig_max); |
| 264 | if (y_orig_max_width >= y_orig_min_width) { |
| 265 | y_width = y_orig_max_width; |
| 266 | } else { |
| 267 | y_width = y_orig_min_width; |
| 268 | } |
| 269 | limits["canvas_width_y"] = y_width + 1; |
| 270 | limits["canvas_width_x"] = limits["canvas_width"] - limits["canvas_width_y"]; |
| 271 | |
| 272 | data_scaled_x_to_width(data_0, data_1, limits); |
| 273 | data_scaled_y_to_height(data_1, data_2, limits); |
| 274 | |
| 275 | canvas_init(canvas, limits["canvas_width"], limits["canvas_height"]); |
| 276 | canvas_overlay_highlight_zero_row(canvas, limits); |
| 277 | canvas_overlay_highlight_zero_col(canvas, limits); |
| 278 | canvas_overlay_highlight_zero(canvas, limits); |
| 279 | canvas_overlay_y_lab(canvas, limits); |
| 280 | canvas_overlay_data(canvas, data_2, limits); |
| 281 | #for (l in limits) { |
| 282 | #printf("limits[%s] -> %s\n", l, limits[l]); |
| 283 | #} |
| 284 | canvas_print(canvas, limits); |
| 285 | } |
| 286 | |
| 287 | # An even better way to think about scaling: ratios!!! Duh! :-D |
| 288 | # |
| 289 | # val_max / val_current = width / val_scaled |
| 290 | # |
| 291 | # val_max width |
| 292 | # ----------- = ------------ |
| 293 | # val_current val_scaled |
| 294 | # |
| 295 | # val_max * val_scaled = val_current * width |
| 296 | # val_scaled = (val_current * width) / val_max |
| 297 | # |
| 298 | # |
| 299 | # num_data_points width |
| 300 | # ---------------- = ----- |
| 301 | # x 1 |
| 302 | # |
| 303 | # width * x = num_data_points |
| 304 | # x = num_data_points / width |
| 305 | # |
| 306 | # But that is what I already tried, and it is awkward to scale up when |
| 307 | # thinking thsese terms, so it is much better to first route each data |
| 308 | # point to an appropriate bin and then aggregate each bin: |
| 309 | # 1. Route: bins[scale(datum)] |
| 310 | # 2. Aggregate: for bin in bins: for val in bin: aggregate(val) |