Implement fetcher and scraper of ny.gov COVID-19 data
authorSiraaj Khandkar <siraaj@khandkar.net>
Mon, 16 Mar 2020 12:44:21 +0000 (08:44 -0400)
committerSiraaj Khandkar <siraaj@khandkar.net>
Mon, 16 Mar 2020 12:44:21 +0000 (08:44 -0400)
home/bin/fetch-covid-19-cases-usa-ny [new file with mode: 0755]

diff --git a/home/bin/fetch-covid-19-cases-usa-ny b/home/bin/fetch-covid-19-cases-usa-ny
new file mode 100755 (executable)
index 0000000..603ce7f
--- /dev/null
@@ -0,0 +1,84 @@
+#! /bin/sh
+#
+# Dependencies:
+#     - curl
+#     - awk
+#     - hxpipe (packaged in html-xml-utils on Debian and Ubuntu)
+
+case "$1" in
+    '') location='New York City:';;
+     *) location="$1";;
+esac
+
+curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \
+| hxpipe \
+| awk -v location="$location" '
+    /^[\(\)]/ {
+        update_node()
+        next
+    }
+
+    /^A/ && $2 == "CDATA" {
+        update_node_attributes()
+        next
+    }
+
+    /^-/ {
+        XmlPayload = substr($0, 2, length($0))
+    }
+
+    XmlPath == "/html/body/div/div/div/div/div/div/table/tr/td" && XmlPayload == location {
+        found = 1;
+        next
+    }
+
+    XmlPath == "/html/body/div/div/div/div/div/div/table/tr/td" && found {
+        print XmlPayload;
+        found = 0;
+        next;
+    }
+
+    function path_to_string(path, depth,    p, i) {
+        p = ""
+        for (i = 1; i <= depth; i++) {
+            p = p "/" path[i]
+        }
+        return p
+    }
+
+    function update_node(    paren, name, key, val, path, attr) {
+        paren = substr($1, 1, 1)
+        name = substr($1, 2, length($1) - 1)
+        if (paren == "(") {
+            _depth++
+            _path[_depth] = name
+            XmlPath = path_to_string(_path, _depth)
+            for (key in _hxpipe_curr_attrs) {
+                val = _hxpipe_curr_attrs[key]
+                XmlAttr[XmlPath, key] = val
+            }
+        } else if (paren == ")") {
+            delete _hxpipe_curr_attrs
+            XmlPayload = ""
+            for (key in XmlAttr) {
+                split(key, k, SUBSEP)
+                path = k[1]
+                attr = k[2]
+                if (path == XmlPath) delete XmlAttr[key]
+            }
+            _depth--
+            XmlPath = path_to_string(_path, _depth)
+        } else {
+            printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr"
+            exit 1
+        }
+    }
+
+    function update_node_attributes(    key, val, s) {
+        key = substr($1, 2, length($1))
+        val = $0
+        s = " +"
+        sub("^" $1 s $2 s, "", val)
+        _hxpipe_curr_attrs[key] = val
+    }
+    '
This page took 0.030566 seconds and 4 git commands to generate.