Support dumping out all the NY data
[covid-19-scrapers.git] / fetch-case-count-usa-ny
CommitLineData
0b60ba94
SK
1#! /bin/sh
2#
3# Dependencies:
4# - curl
5# - awk
6# - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
d5c6a8b1
SK
8# 2020-03-16 morning-afternoon
9#URI='https://health.ny.gov/diseases/communicable/coronavirus/'
10#TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td'
11#DEFAULT_LOCATION='New York City:'
12
13# 2020-03-16 late evening 22:30
14#URI='https://health.ny.gov/diseases/communicable/coronavirus/'
15#TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td'
16#DEFAULT_LOCATION='New York City:'
17
18# 2020-03-17 late afternoon 16:35
19URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases'
20TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td'
21DEFAULT_LOCATION='New York City'
22
0b60ba94
SK
23VALID_LOCATIONS='
24 Albany
2162fd84 25 Allegany
0b60ba94 26 Broome
2162fd84 27 Clinton
0b60ba94
SK
28 Delaware
29 Dutchess
30 Erie
31 Greene
32 Herkimer
33 Monroe
34 Montgomery
35 Nassau
2162fd84 36 New York City
2162fd84
SK
37 Onondaga
38 Ontario
0b60ba94
SK
39 Orange
40 Putnam
2162fd84 41 Rensselaer
0b60ba94
SK
42 Rockland
43 Saratoga
44 Schenectady
45 Suffolk
2162fd84 46 Sullivan
0b60ba94
SK
47 Tioga
48 Tompkins
49 Ulster
50 Westchester
2162fd84 51 Wyoming
0b60ba94 52 '
0b60ba94
SK
53
54usage() {
a8677ce2 55 printf "Usage: %s [OPTION | LOCATION]\n" "$0"
0b60ba94 56 printf '\n'
a8677ce2 57 printf 'OPTION = -h | --all \n'
0b60ba94
SK
58 printf 'LOCATION ='
59 printf '%s\n' "$VALID_LOCATIONS"
60 printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION"
61 exit 1
62}
63
64case "$1" in
65 '-h') usage;;
a8677ce2
SK
66 '--all') opt_all=1;;
67 '' ) target_location="$DEFAULT_LOCATION";;
68 * ) target_location="$1";;
0b60ba94
SK
69esac
70
2162fd84 71curl "$URI" \
0b60ba94 72| hxpipe \
d5c6a8b1 73| awk \
a8677ce2 74 -v target_location="$target_location" \
d5c6a8b1 75 -v target_path="$TARGET_PATH" \
a8677ce2 76 -v opt_all="$opt_all" \
d5c6a8b1 77 '
0b60ba94
SK
78 /^[\(\)]/ {
79 update_node()
80 next
81 }
82
83 /^A/ && $2 == "CDATA" {
84 update_node_attributes()
85 next
86 }
87
88 /^-/ {
89 XmlPayload = substr($0, 2, length($0))
90 }
91
a8677ce2
SK
92 XmlPath == target_path && !location {
93 location = XmlPayload;
0b60ba94
SK
94 next
95 }
96
a8677ce2
SK
97 XmlPath == target_path && location {
98 case_counts[location] = XmlPayload;
99 location = "";
0b60ba94
SK
100 next;
101 }
102
a8677ce2
SK
103 END {
104 if (opt_all) {
105 for (location in case_counts)
106 printf("%d|%s\n", case_counts[location], location)
107 } else {
108 printf("%d\n", case_counts[target_location])
109 }
110 }
111
0b60ba94
SK
112 function path_to_string(path, depth, p, i) {
113 p = ""
114 for (i = 1; i <= depth; i++) {
115 p = p "/" path[i]
116 }
117 return p
118 }
119
120 function update_node( paren, name, key, val, path, attr) {
121 paren = substr($1, 1, 1)
122 name = substr($1, 2, length($1) - 1)
123 if (paren == "(") {
124 _depth++
125 _path[_depth] = name
126 XmlPath = path_to_string(_path, _depth)
127 for (key in _hxpipe_curr_attrs) {
128 val = _hxpipe_curr_attrs[key]
129 XmlAttr[XmlPath, key] = val
130 }
131 } else if (paren == ")") {
132 delete _hxpipe_curr_attrs
133 XmlPayload = ""
134 for (key in XmlAttr) {
135 split(key, k, SUBSEP)
136 path = k[1]
137 attr = k[2]
138 if (path == XmlPath) delete XmlAttr[key]
139 }
140 _depth--
141 XmlPath = path_to_string(_path, _depth)
142 } else {
143 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
144 exit 1
145 }
146 }
147
148 function update_node_attributes( key, val, s) {
149 key = substr($1, 2, length($1))
150 val = $0
151 s = " +"
152 sub("^" $1 s $2 s, "", val)
153 _hxpipe_curr_attrs[key] = val
154 }
a8677ce2
SK
155 ' \
156| column -ts\| \
157| sort -n -k 1
This page took 0.028441 seconds and 4 git commands to generate.