Sort valid NY locations
[covid-19-scrapers.git] / fetch-case-count-usa-ny
CommitLineData
0b60ba94
SK
1#! /bin/sh
2#
3# Dependencies:
4# - curl
5# - awk
6# - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
8VALID_LOCATIONS='
9 Albany
10 Broome
11 Delaware
12 Dutchess
13 Erie
14 Greene
15 Herkimer
16 Monroe
17 Montgomery
18 Nassau
acd2abc3
SK
19 New York City:
20 New York State (Outside of NYC)
0b60ba94
SK
21 Orange
22 Putnam
23 Rockland
24 Saratoga
25 Schenectady
26 Suffolk
27 Tioga
28 Tompkins
acd2abc3 29 Total Positive Cases (Statewide)
0b60ba94
SK
30 Ulster
31 Westchester
0b60ba94
SK
32 '
33DEFAULT_LOCATION='New York City:'
34
35usage() {
36 printf "Usage: %s [LOCATION]\n" "$0"
37 printf '\n'
38 printf 'LOCATION ='
39 printf '%s\n' "$VALID_LOCATIONS"
40 printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION"
41 exit 1
42}
43
44case "$1" in
45 '-h') usage;;
46 '' ) location="$DEFAULT_LOCATION";;
47 * ) location="$1";;
48esac
49
50curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \
51| hxpipe \
52| awk -v location="$location" '
cb4ba200 53 BEGIN {
bcbc6db6
SK
54 # 2020-03-16 morning-afternoon
55 #target_path = "/html/body/div/div/div/div/div/div/table/tr/td"
56
57 # 2020-03-16 late evening 22:30
58 target_path = "/html/body/div/div/div/div/div/div/tr/td"
cb4ba200
SK
59 }
60
0b60ba94
SK
61 /^[\(\)]/ {
62 update_node()
63 next
64 }
65
66 /^A/ && $2 == "CDATA" {
67 update_node_attributes()
68 next
69 }
70
71 /^-/ {
72 XmlPayload = substr($0, 2, length($0))
73 }
74
cb4ba200 75 XmlPath == target_path && XmlPayload == location {
0b60ba94
SK
76 found = 1;
77 next
78 }
79
cb4ba200 80 XmlPath == target_path && found {
0b60ba94
SK
81 print XmlPayload;
82 found = 0;
83 next;
84 }
85
86 function path_to_string(path, depth, p, i) {
87 p = ""
88 for (i = 1; i <= depth; i++) {
89 p = p "/" path[i]
90 }
91 return p
92 }
93
94 function update_node( paren, name, key, val, path, attr) {
95 paren = substr($1, 1, 1)
96 name = substr($1, 2, length($1) - 1)
97 if (paren == "(") {
98 _depth++
99 _path[_depth] = name
100 XmlPath = path_to_string(_path, _depth)
101 for (key in _hxpipe_curr_attrs) {
102 val = _hxpipe_curr_attrs[key]
103 XmlAttr[XmlPath, key] = val
104 }
105 } else if (paren == ")") {
106 delete _hxpipe_curr_attrs
107 XmlPayload = ""
108 for (key in XmlAttr) {
109 split(key, k, SUBSEP)
110 path = k[1]
111 attr = k[2]
112 if (path == XmlPath) delete XmlAttr[key]
113 }
114 _depth--
115 XmlPath = path_to_string(_path, _depth)
116 } else {
117 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
118 exit 1
119 }
120 }
121
122 function update_node_attributes( key, val, s) {
123 key = substr($1, 2, length($1))
124 val = $0
125 s = " +"
126 sub("^" $1 s $2 s, "", val)
127 _hxpipe_curr_attrs[key] = val
128 }
129 '
This page took 0.03806 seconds and 4 git commands to generate.