Update NY URI, location and path
[covid-19-scrapers.git] / fetch-case-count-usa-ny
CommitLineData
0b60ba94
SK
1#! /bin/sh
2#
3# Dependencies:
4# - curl
5# - awk
6# - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
8VALID_LOCATIONS='
9 Albany
2162fd84 10 Allegany
0b60ba94 11 Broome
2162fd84 12 Clinton
0b60ba94
SK
13 Delaware
14 Dutchess
15 Erie
16 Greene
17 Herkimer
18 Monroe
19 Montgomery
20 Nassau
2162fd84 21 New York City
acd2abc3 22 New York State (Outside of NYC)
2162fd84
SK
23 Onondaga
24 Ontario
0b60ba94
SK
25 Orange
26 Putnam
2162fd84 27 Rensselaer
0b60ba94
SK
28 Rockland
29 Saratoga
30 Schenectady
31 Suffolk
2162fd84 32 Sullivan
0b60ba94
SK
33 Tioga
34 Tompkins
acd2abc3 35 Total Positive Cases (Statewide)
0b60ba94
SK
36 Ulster
37 Westchester
2162fd84 38 Wyoming
0b60ba94 39 '
2162fd84 40DEFAULT_LOCATION='New York City'
0b60ba94
SK
41
42usage() {
43 printf "Usage: %s [LOCATION]\n" "$0"
44 printf '\n'
45 printf 'LOCATION ='
46 printf '%s\n' "$VALID_LOCATIONS"
47 printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION"
48 exit 1
49}
50
51case "$1" in
52 '-h') usage;;
53 '' ) location="$DEFAULT_LOCATION";;
54 * ) location="$1";;
55esac
56
2162fd84
SK
57# 2020-03-16
58#URI='https://health.ny.gov/diseases/communicable/coronavirus/'
59
60# 2020-03-17
61URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases'
62
63curl "$URI" \
0b60ba94
SK
64| hxpipe \
65| awk -v location="$location" '
cb4ba200 66 BEGIN {
bcbc6db6
SK
67 # 2020-03-16 morning-afternoon
68 #target_path = "/html/body/div/div/div/div/div/div/table/tr/td"
69
70 # 2020-03-16 late evening 22:30
2162fd84
SK
71 #target_path = "/html/body/div/div/div/div/div/div/tr/td"
72
73 # 2020-03-17 late afternoon 16:35
74 target_path = "/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td"
cb4ba200
SK
75 }
76
0b60ba94
SK
77 /^[\(\)]/ {
78 update_node()
79 next
80 }
81
82 /^A/ && $2 == "CDATA" {
83 update_node_attributes()
84 next
85 }
86
87 /^-/ {
88 XmlPayload = substr($0, 2, length($0))
89 }
90
cb4ba200 91 XmlPath == target_path && XmlPayload == location {
0b60ba94
SK
92 found = 1;
93 next
94 }
95
cb4ba200 96 XmlPath == target_path && found {
0b60ba94
SK
97 print XmlPayload;
98 found = 0;
99 next;
100 }
101
102 function path_to_string(path, depth, p, i) {
103 p = ""
104 for (i = 1; i <= depth; i++) {
105 p = p "/" path[i]
106 }
107 return p
108 }
109
110 function update_node( paren, name, key, val, path, attr) {
111 paren = substr($1, 1, 1)
112 name = substr($1, 2, length($1) - 1)
113 if (paren == "(") {
114 _depth++
115 _path[_depth] = name
116 XmlPath = path_to_string(_path, _depth)
117 for (key in _hxpipe_curr_attrs) {
118 val = _hxpipe_curr_attrs[key]
119 XmlAttr[XmlPath, key] = val
120 }
121 } else if (paren == ")") {
122 delete _hxpipe_curr_attrs
123 XmlPayload = ""
124 for (key in XmlAttr) {
125 split(key, k, SUBSEP)
126 path = k[1]
127 attr = k[2]
128 if (path == XmlPath) delete XmlAttr[key]
129 }
130 _depth--
131 XmlPath = path_to_string(_path, _depth)
132 } else {
133 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
134 exit 1
135 }
136 }
137
138 function update_node_attributes( key, val, s) {
139 key = substr($1, 2, length($1))
140 val = $0
141 s = " +"
142 sub("^" $1 s $2 s, "", val)
143 _hxpipe_curr_attrs[key] = val
144 }
145 '
This page took 0.02602 seconds and 4 git commands to generate.