From: Siraaj Khandkar Date: Mon, 16 Mar 2020 12:44:21 +0000 (-0400) Subject: Implement fetcher and scraper of ny.gov COVID-19 data X-Git-Url: https://git.xandkar.net/?a=commitdiff_plain;h=2c77b18f80ab8be7340e87d65e22dd4ac83ee2c0;p=khome.git Implement fetcher and scraper of ny.gov COVID-19 data --- diff --git a/home/bin/fetch-covid-19-cases-usa-ny b/home/bin/fetch-covid-19-cases-usa-ny new file mode 100755 index 0000000..603ce7f --- /dev/null +++ b/home/bin/fetch-covid-19-cases-usa-ny @@ -0,0 +1,84 @@ +#! /bin/sh +# +# Dependencies: +# - curl +# - awk +# - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) + +case "$1" in + '') location='New York City:';; + *) location="$1";; +esac + +curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \ +| hxpipe \ +| awk -v location="$location" ' + /^[\(\)]/ { + update_node() + next + } + + /^A/ && $2 == "CDATA" { + update_node_attributes() + next + } + + /^-/ { + XmlPayload = substr($0, 2, length($0)) + } + + XmlPath == "/html/body/div/div/div/div/div/div/table/tr/td" && XmlPayload == location { + found = 1; + next + } + + XmlPath == "/html/body/div/div/div/div/div/div/table/tr/td" && found { + print XmlPayload; + found = 0; + next; + } + + function path_to_string(path, depth, p, i) { + p = "" + for (i = 1; i <= depth; i++) { + p = p "/" path[i] + } + return p + } + + function update_node( paren, name, key, val, path, attr) { + paren = substr($1, 1, 1) + name = substr($1, 2, length($1) - 1) + if (paren == "(") { + _depth++ + _path[_depth] = name + XmlPath = path_to_string(_path, _depth) + for (key in _hxpipe_curr_attrs) { + val = _hxpipe_curr_attrs[key] + XmlAttr[XmlPath, key] = val + } + } else if (paren == ")") { + delete _hxpipe_curr_attrs + XmlPayload = "" + for (key in XmlAttr) { + split(key, k, SUBSEP) + path = k[1] + attr = k[2] + if (path == XmlPath) delete XmlAttr[key] + } + _depth-- + XmlPath = path_to_string(_path, _depth) + } else { + printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr" + exit 1 + } + } + + function update_node_attributes( key, val, s) { + key = substr($1, 2, length($1)) + val = $0 + s = " +" + sub("^" $1 s $2 s, "", val) + _hxpipe_curr_attrs[key] = val + } + '