#! /bin/sh # # Dependencies: # - curl # - awk # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) VALID_LOCATIONS=' Albany Allegany Broome Clinton Delaware Dutchess Erie Greene Herkimer Monroe Montgomery Nassau New York City New York State (Outside of NYC) Onondaga Ontario Orange Putnam Rensselaer Rockland Saratoga Schenectady Suffolk Sullivan Tioga Tompkins Total Positive Cases (Statewide) Ulster Westchester Wyoming ' DEFAULT_LOCATION='New York City' usage() { printf "Usage: %s [LOCATION]\n" "$0" printf '\n' printf 'LOCATION =' printf '%s\n' "$VALID_LOCATIONS" printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION" exit 1 } case "$1" in '-h') usage;; '' ) location="$DEFAULT_LOCATION";; * ) location="$1";; esac # 2020-03-16 #URI='https://health.ny.gov/diseases/communicable/coronavirus/' # 2020-03-17 URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases' curl "$URI" \ | hxpipe \ | awk -v location="$location" ' BEGIN { # 2020-03-16 morning-afternoon #target_path = "/html/body/div/div/div/div/div/div/table/tr/td" # 2020-03-16 late evening 22:30 #target_path = "/html/body/div/div/div/div/div/div/tr/td" # 2020-03-17 late afternoon 16:35 target_path = "/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td" } /^[\(\)]/ { update_node() next } /^A/ && $2 == "CDATA" { update_node_attributes() next } /^-/ { XmlPayload = substr($0, 2, length($0)) } XmlPath == target_path && XmlPayload == location { found = 1; next } XmlPath == target_path && found { print XmlPayload; found = 0; next; } function path_to_string(path, depth, p, i) { p = "" for (i = 1; i <= depth; i++) { p = p "/" path[i] } return p } function update_node( paren, name, key, val, path, attr) { paren = substr($1, 1, 1) name = substr($1, 2, length($1) - 1) if (paren == "(") { _depth++ _path[_depth] = name XmlPath = path_to_string(_path, _depth) for (key in _hxpipe_curr_attrs) { val = _hxpipe_curr_attrs[key] XmlAttr[XmlPath, key] = val } } else if (paren == ")") { delete _hxpipe_curr_attrs XmlPayload = "" for (key in XmlAttr) { split(key, k, SUBSEP) path = k[1] attr = k[2] if (path == XmlPath) delete XmlAttr[key] } _depth-- XmlPath = path_to_string(_path, _depth) } else { printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr" exit 1 } } function update_node_attributes( key, val, s) { key = substr($1, 2, length($1)) val = $0 s = " +" sub("^" $1 s $2 s, "", val) _hxpipe_curr_attrs[key] = val } '