Commit | Line | Data |
---|---|---|
0b60ba94 SK |
1 | #! /bin/sh |
2 | # | |
3 | # Dependencies: | |
4 | # - curl | |
5 | # - awk | |
6 | # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) | |
7 | ||
d5c6a8b1 SK |
8 | # 2020-03-16 morning-afternoon |
9 | #URI='https://health.ny.gov/diseases/communicable/coronavirus/' | |
10 | #TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td' | |
11 | #DEFAULT_LOCATION='New York City:' | |
12 | ||
13 | # 2020-03-16 late evening 22:30 | |
14 | #URI='https://health.ny.gov/diseases/communicable/coronavirus/' | |
15 | #TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td' | |
16 | #DEFAULT_LOCATION='New York City:' | |
17 | ||
18 | # 2020-03-17 late afternoon 16:35 | |
19 | URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases' | |
20 | TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td' | |
21 | DEFAULT_LOCATION='New York City' | |
22 | ||
0b60ba94 SK |
23 | VALID_LOCATIONS=' |
24 | Albany | |
2162fd84 | 25 | Allegany |
0b60ba94 | 26 | Broome |
2162fd84 | 27 | Clinton |
0b60ba94 SK |
28 | Delaware |
29 | Dutchess | |
30 | Erie | |
31 | Greene | |
32 | Herkimer | |
33 | Monroe | |
34 | Montgomery | |
35 | Nassau | |
2162fd84 | 36 | New York City |
2162fd84 SK |
37 | Onondaga |
38 | Ontario | |
0b60ba94 SK |
39 | Orange |
40 | Putnam | |
2162fd84 | 41 | Rensselaer |
0b60ba94 SK |
42 | Rockland |
43 | Saratoga | |
44 | Schenectady | |
45 | Suffolk | |
2162fd84 | 46 | Sullivan |
0b60ba94 SK |
47 | Tioga |
48 | Tompkins | |
49 | Ulster | |
50 | Westchester | |
2162fd84 | 51 | Wyoming |
0b60ba94 | 52 | ' |
0b60ba94 SK |
53 | |
54 | usage() { | |
a8677ce2 | 55 | printf "Usage: %s [OPTION | LOCATION]\n" "$0" |
0b60ba94 | 56 | printf '\n' |
a8677ce2 | 57 | printf 'OPTION = -h | --all \n' |
0b60ba94 SK |
58 | printf 'LOCATION =' |
59 | printf '%s\n' "$VALID_LOCATIONS" | |
60 | printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION" | |
61 | exit 1 | |
62 | } | |
63 | ||
64 | case "$1" in | |
65 | '-h') usage;; | |
a8677ce2 SK |
66 | '--all') opt_all=1;; |
67 | '' ) target_location="$DEFAULT_LOCATION";; | |
68 | * ) target_location="$1";; | |
0b60ba94 SK |
69 | esac |
70 | ||
2162fd84 | 71 | curl "$URI" \ |
0b60ba94 | 72 | | hxpipe \ |
d5c6a8b1 | 73 | | awk \ |
a8677ce2 | 74 | -v target_location="$target_location" \ |
d5c6a8b1 | 75 | -v target_path="$TARGET_PATH" \ |
a8677ce2 | 76 | -v opt_all="$opt_all" \ |
d5c6a8b1 | 77 | ' |
0b60ba94 SK |
78 | /^[\(\)]/ { |
79 | update_node() | |
80 | next | |
81 | } | |
82 | ||
83 | /^A/ && $2 == "CDATA" { | |
84 | update_node_attributes() | |
85 | next | |
86 | } | |
87 | ||
88 | /^-/ { | |
89 | XmlPayload = substr($0, 2, length($0)) | |
90 | } | |
91 | ||
a8677ce2 SK |
92 | XmlPath == target_path && !location { |
93 | location = XmlPayload; | |
0b60ba94 SK |
94 | next |
95 | } | |
96 | ||
a8677ce2 SK |
97 | XmlPath == target_path && location { |
98 | case_counts[location] = XmlPayload; | |
99 | location = ""; | |
0b60ba94 SK |
100 | next; |
101 | } | |
102 | ||
a8677ce2 SK |
103 | END { |
104 | if (opt_all) { | |
105 | for (location in case_counts) | |
106 | printf("%d|%s\n", case_counts[location], location) | |
107 | } else { | |
108 | printf("%d\n", case_counts[target_location]) | |
109 | } | |
110 | } | |
111 | ||
0b60ba94 SK |
112 | function path_to_string(path, depth, p, i) { |
113 | p = "" | |
114 | for (i = 1; i <= depth; i++) { | |
115 | p = p "/" path[i] | |
116 | } | |
117 | return p | |
118 | } | |
119 | ||
120 | function update_node( paren, name, key, val, path, attr) { | |
121 | paren = substr($1, 1, 1) | |
122 | name = substr($1, 2, length($1) - 1) | |
123 | if (paren == "(") { | |
124 | _depth++ | |
125 | _path[_depth] = name | |
126 | XmlPath = path_to_string(_path, _depth) | |
127 | for (key in _hxpipe_curr_attrs) { | |
128 | val = _hxpipe_curr_attrs[key] | |
129 | XmlAttr[XmlPath, key] = val | |
130 | } | |
131 | } else if (paren == ")") { | |
132 | delete _hxpipe_curr_attrs | |
133 | XmlPayload = "" | |
134 | for (key in XmlAttr) { | |
135 | split(key, k, SUBSEP) | |
136 | path = k[1] | |
137 | attr = k[2] | |
138 | if (path == XmlPath) delete XmlAttr[key] | |
139 | } | |
140 | _depth-- | |
141 | XmlPath = path_to_string(_path, _depth) | |
142 | } else { | |
143 | printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr" | |
144 | exit 1 | |
145 | } | |
146 | } | |
147 | ||
148 | function update_node_attributes( key, val, s) { | |
149 | key = substr($1, 2, length($1)) | |
150 | val = $0 | |
151 | s = " +" | |
152 | sub("^" $1 s $2 s, "", val) | |
153 | _hxpipe_curr_attrs[key] = val | |
154 | } | |
a8677ce2 SK |
155 | ' \ |
156 | | column -ts\| \ | |
157 | | sort -n -k 1 |