Update NY usage example
[covid-19-scrapers.git] / fetch-case-count-usa-ny
1 #! /bin/sh
2 #
3 # Dependencies:
4 # - curl
5 # - awk
6 # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
8 # 2020-03-16 morning-afternoon
9 #URI='https://health.ny.gov/diseases/communicable/coronavirus/'
10 #TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td'
11 #DEFAULT_LOCATION='New York City:'
12
13 # 2020-03-16 late evening 22:30
14 #URI='https://health.ny.gov/diseases/communicable/coronavirus/'
15 #TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td'
16 #DEFAULT_LOCATION='New York City:'
17
18 # 2020-03-17 late afternoon 16:35
19 URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases'
20 TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td'
21 DEFAULT_LOCATION='New York City'
22
23 VALID_LOCATIONS='
24 Albany
25 Allegany
26 Broome
27 Clinton
28 Delaware
29 Dutchess
30 Erie
31 Greene
32 Herkimer
33 Monroe
34 Montgomery
35 Nassau
36 New York City
37 Onondaga
38 Ontario
39 Orange
40 Putnam
41 Rensselaer
42 Rockland
43 Saratoga
44 Schenectady
45 Suffolk
46 Sullivan
47 Tioga
48 Tompkins
49 Ulster
50 Westchester
51 Wyoming
52 '
53
54 usage() {
55 printf "Usage: %s [OPTION | LOCATION]\n" "$0"
56 printf '\n'
57 printf 'OPTION = -h | --all \n'
58 printf 'LOCATION ='
59 printf '%s\n' "$VALID_LOCATIONS"
60 printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION"
61 exit 1
62 }
63
64 case "$1" in
65 '-h') usage;;
66 '--all') opt_all=1;;
67 '' ) target_location="$DEFAULT_LOCATION";;
68 * ) target_location="$1";;
69 esac
70
71 curl "$URI" \
72 | hxpipe \
73 | awk \
74 -v target_location="$target_location" \
75 -v target_path="$TARGET_PATH" \
76 -v opt_all="$opt_all" \
77 '
78 /^[\(\)]/ {
79 update_node()
80 next
81 }
82
83 /^A/ && $2 == "CDATA" {
84 update_node_attributes()
85 next
86 }
87
88 /^-/ {
89 XmlPayload = substr($0, 2, length($0))
90 }
91
92 XmlPath == target_path && !location {
93 location = XmlPayload;
94 next
95 }
96
97 XmlPath == target_path && location {
98 case_counts[location] = XmlPayload;
99 location = "";
100 next;
101 }
102
103 END {
104 if (opt_all) {
105 for (location in case_counts)
106 printf("%d|%s\n", case_counts[location], location)
107 } else {
108 printf("%d\n", case_counts[target_location])
109 }
110 }
111
112 function path_to_string(path, depth, p, i) {
113 p = ""
114 for (i = 1; i <= depth; i++) {
115 p = p "/" path[i]
116 }
117 return p
118 }
119
120 function update_node( paren, name, key, val, path, attr) {
121 paren = substr($1, 1, 1)
122 name = substr($1, 2, length($1) - 1)
123 if (paren == "(") {
124 _depth++
125 _path[_depth] = name
126 XmlPath = path_to_string(_path, _depth)
127 for (key in _hxpipe_curr_attrs) {
128 val = _hxpipe_curr_attrs[key]
129 XmlAttr[XmlPath, key] = val
130 }
131 } else if (paren == ")") {
132 delete _hxpipe_curr_attrs
133 XmlPayload = ""
134 for (key in XmlAttr) {
135 split(key, k, SUBSEP)
136 path = k[1]
137 attr = k[2]
138 if (path == XmlPath) delete XmlAttr[key]
139 }
140 _depth--
141 XmlPath = path_to_string(_path, _depth)
142 } else {
143 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
144 exit 1
145 }
146 }
147
148 function update_node_attributes( key, val, s) {
149 key = substr($1, 2, length($1))
150 val = $0
151 s = " +"
152 sub("^" $1 s $2 s, "", val)
153 _hxpipe_curr_attrs[key] = val
154 }
155 ' \
156 | column -ts\| \
157 | sort -n -k 1
This page took 0.057955 seconds and 5 git commands to generate.