#! /bin/sh # # Dependencies: # - curl # - awk # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) # 2020-03-16 morning-afternoon #URI='https://health.ny.gov/diseases/communicable/coronavirus/' #TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td' #DEFAULT_LOCATION='New York City:' # 2020-03-16 late evening 22:30 #URI='https://health.ny.gov/diseases/communicable/coronavirus/' #TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td' #DEFAULT_LOCATION='New York City:' # 2020-03-17 late afternoon 16:35 URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases' TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td' DEFAULT_LOCATION='New York City' VALID_LOCATIONS=' Albany Allegany Broome Clinton Delaware Dutchess Erie Greene Herkimer Monroe Montgomery Nassau New York City Onondaga Ontario Orange Putnam Rensselaer Rockland Saratoga Schenectady Suffolk Sullivan Tioga Tompkins Ulster Westchester Wyoming ' usage() { printf "Usage: %s [OPTION | LOCATION]\n" "$0" printf '\n' printf 'OPTION = -h | --all \n' printf 'LOCATION =' printf '%s\n' "$VALID_LOCATIONS" printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION" exit 1 } case "$1" in '-h') usage;; '--all') opt_all=1;; '' ) target_location="$DEFAULT_LOCATION";; * ) target_location="$1";; esac curl "$URI" \ | hxpipe \ | awk \ -v target_location="$target_location" \ -v target_path="$TARGET_PATH" \ -v opt_all="$opt_all" \ ' /^[\(\)]/ { update_node() next } /^A/ && $2 == "CDATA" { update_node_attributes() next } /^-/ { XmlPayload = substr($0, 2, length($0)) } XmlPath == target_path && !location { location = XmlPayload; next } XmlPath == target_path && location { case_counts[location] = XmlPayload; location = ""; next; } END { if (opt_all) { for (location in case_counts) printf("%d|%s\n", case_counts[location], location) } else { printf("%d\n", case_counts[target_location]) } } function path_to_string(path, depth, p, i) { p = "" for (i = 1; i <= depth; i++) { p = p "/" path[i] } return p } function update_node( paren, name, key, val, path, attr) { paren = substr($1, 1, 1) name = substr($1, 2, length($1) - 1) if (paren == "(") { _depth++ _path[_depth] = name XmlPath = path_to_string(_path, _depth) for (key in _hxpipe_curr_attrs) { val = _hxpipe_curr_attrs[key] XmlAttr[XmlPath, key] = val } } else if (paren == ")") { delete _hxpipe_curr_attrs XmlPayload = "" for (key in XmlAttr) { split(key, k, SUBSEP) path = k[1] attr = k[2] if (path == XmlPath) delete XmlAttr[key] } _depth-- XmlPath = path_to_string(_path, _depth) } else { printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr" exit 1 } } function update_node_attributes( key, val, s) { key = substr($1, 2, length($1)) val = $0 s = " +" sub("^" $1 s $2 s, "", val) _hxpipe_curr_attrs[key] = val } ' \ | column -ts\| \ | sort -n -k 1