X-Git-Url: https://git.xandkar.net/?p=covid-19-scrapers.git;a=blobdiff_plain;f=fetch-case-count-usa-ny;h=16cf859969241d777c71a4ebb3a1932fb90953a6;hp=b3bd2515ce594e911679abb8cfe821947e30bf87;hb=a8677ce2ff31aed05e1adf8bc142b459f662b52a;hpb=cb4ba2007be59c08a5d0458ed629d20ffae69c1f diff --git a/fetch-case-count-usa-ny b/fetch-case-count-usa-ny index b3bd251..16cf859 100755 --- a/fetch-case-count-usa-ny +++ b/fetch-case-count-usa-ny @@ -5,9 +5,26 @@ # - awk # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) +# 2020-03-16 morning-afternoon +#URI='https://health.ny.gov/diseases/communicable/coronavirus/' +#TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td' +#DEFAULT_LOCATION='New York City:' + +# 2020-03-16 late evening 22:30 +#URI='https://health.ny.gov/diseases/communicable/coronavirus/' +#TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td' +#DEFAULT_LOCATION='New York City:' + +# 2020-03-17 late afternoon 16:35 +URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases' +TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td' +DEFAULT_LOCATION='New York City' + VALID_LOCATIONS=' Albany + Allegany Broome + Clinton Delaware Dutchess Erie @@ -16,25 +33,28 @@ VALID_LOCATIONS=' Monroe Montgomery Nassau + New York City + Onondaga + Ontario Orange Putnam + Rensselaer Rockland Saratoga Schenectady Suffolk + Sullivan Tioga Tompkins Ulster Westchester - New York State (Outside of NYC) - New York City: - Total Positive Cases (Statewide) + Wyoming ' -DEFAULT_LOCATION='New York City:' usage() { - printf "Usage: %s [LOCATION]\n" "$0" + printf "Usage: %s [OPTION | LOCATION]\n" "$0" printf '\n' + printf 'OPTION = -h | --all \n' printf 'LOCATION =' printf '%s\n' "$VALID_LOCATIONS" printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION" @@ -43,17 +63,18 @@ usage() { case "$1" in '-h') usage;; - '' ) location="$DEFAULT_LOCATION";; - * ) location="$1";; + '--all') opt_all=1;; + '' ) target_location="$DEFAULT_LOCATION";; + * ) target_location="$1";; esac -curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \ +curl "$URI" \ | hxpipe \ -| awk -v location="$location" ' - BEGIN { - target_path = "/html/body/div/div/div/div/div/div/table/tr/td" - } - +| awk \ + -v target_location="$target_location" \ + -v target_path="$TARGET_PATH" \ + -v opt_all="$opt_all" \ + ' /^[\(\)]/ { update_node() next @@ -68,17 +89,26 @@ curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \ XmlPayload = substr($0, 2, length($0)) } - XmlPath == target_path && XmlPayload == location { - found = 1; + XmlPath == target_path && !location { + location = XmlPayload; next } - XmlPath == target_path && found { - print XmlPayload; - found = 0; + XmlPath == target_path && location { + case_counts[location] = XmlPayload; + location = ""; next; } + END { + if (opt_all) { + for (location in case_counts) + printf("%d|%s\n", case_counts[location], location) + } else { + printf("%d\n", case_counts[target_location]) + } + } + function path_to_string(path, depth, p, i) { p = "" for (i = 1; i <= depth; i++) { @@ -122,4 +152,6 @@ curl 'https://health.ny.gov/diseases/communicable/coronavirus/' \ sub("^" $1 s $2 s, "", val) _hxpipe_curr_attrs[key] = val } - ' + ' \ +| column -ts\| \ +| sort -n -k 1