Implement fetcher and scraper of ny.gov COVID-19 data
[khome.git] / home / bin / fetch-covid-19-cases-usa-ny
1 #! /bin/sh
2 #
3 # Dependencies:
4 # - curl
5 # - awk
6 # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
7
8 case "$1" in
9 '') location='New York City:';;
10 *) location="$1";;
11 esac
12
13 curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \
14 | hxpipe \
15 | awk -v location="$location" '
16 /^[\(\)]/ {
17 update_node()
18 next
19 }
20
21 /^A/ && $2 == "CDATA" {
22 update_node_attributes()
23 next
24 }
25
26 /^-/ {
27 XmlPayload = substr($0, 2, length($0))
28 }
29
30 XmlPath == "/html/body/div/div/div/div/div/div/table/tr/td" && XmlPayload == location {
31 found = 1;
32 next
33 }
34
35 XmlPath == "/html/body/div/div/div/div/div/div/table/tr/td" && found {
36 print XmlPayload;
37 found = 0;
38 next;
39 }
40
41 function path_to_string(path, depth, p, i) {
42 p = ""
43 for (i = 1; i <= depth; i++) {
44 p = p "/" path[i]
45 }
46 return p
47 }
48
49 function update_node( paren, name, key, val, path, attr) {
50 paren = substr($1, 1, 1)
51 name = substr($1, 2, length($1) - 1)
52 if (paren == "(") {
53 _depth++
54 _path[_depth] = name
55 XmlPath = path_to_string(_path, _depth)
56 for (key in _hxpipe_curr_attrs) {
57 val = _hxpipe_curr_attrs[key]
58 XmlAttr[XmlPath, key] = val
59 }
60 } else if (paren == ")") {
61 delete _hxpipe_curr_attrs
62 XmlPayload = ""
63 for (key in XmlAttr) {
64 split(key, k, SUBSEP)
65 path = k[1]
66 attr = k[2]
67 if (path == XmlPath) delete XmlAttr[key]
68 }
69 _depth--
70 XmlPath = path_to_string(_path, _depth)
71 } else {
72 printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
73 exit 1
74 }
75 }
76
77 function update_node_attributes( key, val, s) {
78 key = substr($1, 2, length($1))
79 val = $0
80 s = " +"
81 sub("^" $1 s $2 s, "", val)
82 _hxpipe_curr_attrs[key] = val
83 }
84 '
This page took 0.079307 seconds and 4 git commands to generate.