Update NY URI, location and path
[covid-19-scrapers.git] / fetch-case-count-usa-ny
1 #! /bin/sh
2 #
3 # Dependencies:
4 # - curl
5 # - awk
6 # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
8 VALID_LOCATIONS='
9 Albany
10 Allegany
11 Broome
12 Clinton
13 Delaware
14 Dutchess
15 Erie
16 Greene
17 Herkimer
18 Monroe
19 Montgomery
20 Nassau
21 New York City
22 New York State (Outside of NYC)
23 Onondaga
24 Ontario
25 Orange
26 Putnam
27 Rensselaer
28 Rockland
29 Saratoga
30 Schenectady
31 Suffolk
32 Sullivan
33 Tioga
34 Tompkins
35 Total Positive Cases (Statewide)
36 Ulster
37 Westchester
38 Wyoming
39 '
40 DEFAULT_LOCATION='New York City'
41
42 usage() {
43 printf "Usage: %s [LOCATION]\n" "$0"
44 printf '\n'
45 printf 'LOCATION ='
46 printf '%s\n' "$VALID_LOCATIONS"
47 printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION"
48 exit 1
49 }
50
51 case "$1" in
52 '-h') usage;;
53 '' ) location="$DEFAULT_LOCATION";;
54 * ) location="$1";;
55 esac
56
57 # 2020-03-16
58 #URI='https://health.ny.gov/diseases/communicable/coronavirus/'
59
60 # 2020-03-17
61 URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases'
62
63 curl "$URI" \
64 | hxpipe \
65 | awk -v location="$location" '
66 BEGIN {
67 # 2020-03-16 morning-afternoon
68 #target_path = "/html/body/div/div/div/div/div/div/table/tr/td"
69
70 # 2020-03-16 late evening 22:30
71 #target_path = "/html/body/div/div/div/div/div/div/tr/td"
72
73 # 2020-03-17 late afternoon 16:35
74 target_path = "/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td"
75 }
76
77 /^[\(\)]/ {
78 update_node()
79 next
80 }
81
82 /^A/ && $2 == "CDATA" {
83 update_node_attributes()
84 next
85 }
86
87 /^-/ {
88 XmlPayload = substr($0, 2, length($0))
89 }
90
91 XmlPath == target_path && XmlPayload == location {
92 found = 1;
93 next
94 }
95
96 XmlPath == target_path && found {
97 print XmlPayload;
98 found = 0;
99 next;
100 }
101
102 function path_to_string(path, depth, p, i) {
103 p = ""
104 for (i = 1; i <= depth; i++) {
105 p = p "/" path[i]
106 }
107 return p
108 }
109
110 function update_node( paren, name, key, val, path, attr) {
111 paren = substr($1, 1, 1)
112 name = substr($1, 2, length($1) - 1)
113 if (paren == "(") {
114 _depth++
115 _path[_depth] = name
116 XmlPath = path_to_string(_path, _depth)
117 for (key in _hxpipe_curr_attrs) {
118 val = _hxpipe_curr_attrs[key]
119 XmlAttr[XmlPath, key] = val
120 }
121 } else if (paren == ")") {
122 delete _hxpipe_curr_attrs
123 XmlPayload = ""
124 for (key in XmlAttr) {
125 split(key, k, SUBSEP)
126 path = k[1]
127 attr = k[2]
128 if (path == XmlPath) delete XmlAttr[key]
129 }
130 _depth--
131 XmlPath = path_to_string(_path, _depth)
132 } else {
133 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
134 exit 1
135 }
136 }
137
138 function update_node_attributes( key, val, s) {
139 key = substr($1, 2, length($1))
140 val = $0
141 s = " +"
142 sub("^" $1 s $2 s, "", val)
143 _hxpipe_curr_attrs[key] = val
144 }
145 '
This page took 0.052251 seconds and 4 git commands to generate.