e346935c14551c49381e5b8679d448a028e0a65f
[covid-19-scrapers.git] / fetch-case-count-usa-ny
1 #! /bin/sh
2 #
3 # Dependencies:
4 # - curl
5 # - awk
6 # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
8 # 2020-03-16 morning-afternoon
9 #URI='https://health.ny.gov/diseases/communicable/coronavirus/'
10 #TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td'
11 #DEFAULT_LOCATION='New York City:'
12
13 # 2020-03-16 late evening 22:30
14 #URI='https://health.ny.gov/diseases/communicable/coronavirus/'
15 #TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td'
16 #DEFAULT_LOCATION='New York City:'
17
18 # 2020-03-17 late afternoon 16:35
19 URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases'
20 TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td'
21 DEFAULT_LOCATION='New York City'
22
23 VALID_LOCATIONS='
24 Albany
25 Allegany
26 Broome
27 Clinton
28 Delaware
29 Dutchess
30 Erie
31 Greene
32 Herkimer
33 Monroe
34 Montgomery
35 Nassau
36 New York City
37 New York State (Outside of NYC)
38 Onondaga
39 Ontario
40 Orange
41 Putnam
42 Rensselaer
43 Rockland
44 Saratoga
45 Schenectady
46 Suffolk
47 Sullivan
48 Tioga
49 Tompkins
50 Total Positive Cases (Statewide)
51 Ulster
52 Westchester
53 Wyoming
54 '
55
56 usage() {
57 printf "Usage: %s [LOCATION]\n" "$0"
58 printf '\n'
59 printf 'LOCATION ='
60 printf '%s\n' "$VALID_LOCATIONS"
61 printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION"
62 exit 1
63 }
64
65 case "$1" in
66 '-h') usage;;
67 '' ) location="$DEFAULT_LOCATION";;
68 * ) location="$1";;
69 esac
70
71 curl "$URI" \
72 | hxpipe \
73 | awk \
74 -v location="$location" \
75 -v target_path="$TARGET_PATH" \
76 '
77 /^[\(\)]/ {
78 update_node()
79 next
80 }
81
82 /^A/ && $2 == "CDATA" {
83 update_node_attributes()
84 next
85 }
86
87 /^-/ {
88 XmlPayload = substr($0, 2, length($0))
89 }
90
91 XmlPath == target_path && XmlPayload == location {
92 found = 1;
93 next
94 }
95
96 XmlPath == target_path && found {
97 print XmlPayload;
98 found = 0;
99 next;
100 }
101
102 function path_to_string(path, depth, p, i) {
103 p = ""
104 for (i = 1; i <= depth; i++) {
105 p = p "/" path[i]
106 }
107 return p
108 }
109
110 function update_node( paren, name, key, val, path, attr) {
111 paren = substr($1, 1, 1)
112 name = substr($1, 2, length($1) - 1)
113 if (paren == "(") {
114 _depth++
115 _path[_depth] = name
116 XmlPath = path_to_string(_path, _depth)
117 for (key in _hxpipe_curr_attrs) {
118 val = _hxpipe_curr_attrs[key]
119 XmlAttr[XmlPath, key] = val
120 }
121 } else if (paren == ")") {
122 delete _hxpipe_curr_attrs
123 XmlPayload = ""
124 for (key in XmlAttr) {
125 split(key, k, SUBSEP)
126 path = k[1]
127 attr = k[2]
128 if (path == XmlPath) delete XmlAttr[key]
129 }
130 _depth--
131 XmlPath = path_to_string(_path, _depth)
132 } else {
133 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
134 exit 1
135 }
136 }
137
138 function update_node_attributes( key, val, s) {
139 key = substr($1, 2, length($1))
140 val = $0
141 s = " +"
142 sub("^" $1 s $2 s, "", val)
143 _hxpipe_curr_attrs[key] = val
144 }
145 '
This page took 0.057622 seconds and 3 git commands to generate.