X-Git-Url: https://git.xandkar.net/?p=covid-19-scrapers.git;a=blobdiff_plain;f=fetch-case-count-usa-ny;h=16cf859969241d777c71a4ebb3a1932fb90953a6;hp=808d7be4b60acebcae2c0bf2a1d789ef8dfe0287;hb=851726db1ae32895ef5a499bc59da443be33c3c0;hpb=2162fd84b7bf9f87e6318e139e9f2a687659e26e diff --git a/fetch-case-count-usa-ny b/fetch-case-count-usa-ny index 808d7be..16cf859 100755 --- a/fetch-case-count-usa-ny +++ b/fetch-case-count-usa-ny @@ -5,6 +5,21 @@ # - awk # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) +# 2020-03-16 morning-afternoon +#URI='https://health.ny.gov/diseases/communicable/coronavirus/' +#TARGET_PATH='/html/body/div/div/div/div/div/div/table/tr/td' +#DEFAULT_LOCATION='New York City:' + +# 2020-03-16 late evening 22:30 +#URI='https://health.ny.gov/diseases/communicable/coronavirus/' +#TARGET_PATH='/html/body/div/div/div/div/div/div/tr/td' +#DEFAULT_LOCATION='New York City:' + +# 2020-03-17 late afternoon 16:35 +URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases' +TARGET_PATH='/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td' +DEFAULT_LOCATION='New York City' + VALID_LOCATIONS=' Albany Allegany @@ -19,7 +34,6 @@ VALID_LOCATIONS=' Montgomery Nassau New York City - New York State (Outside of NYC) Onondaga Ontario Orange @@ -32,16 +46,15 @@ VALID_LOCATIONS=' Sullivan Tioga Tompkins - Total Positive Cases (Statewide) Ulster Westchester Wyoming ' -DEFAULT_LOCATION='New York City' usage() { - printf "Usage: %s [LOCATION]\n" "$0" + printf "Usage: %s [OPTION | LOCATION]\n" "$0" printf '\n' + printf 'OPTION = -h | --all \n' printf 'LOCATION =' printf '%s\n' "$VALID_LOCATIONS" printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION" @@ -50,30 +63,18 @@ usage() { case "$1" in '-h') usage;; - '' ) location="$DEFAULT_LOCATION";; - * ) location="$1";; + '--all') opt_all=1;; + '' ) target_location="$DEFAULT_LOCATION";; + * ) target_location="$1";; esac -# 2020-03-16 -#URI='https://health.ny.gov/diseases/communicable/coronavirus/' - -# 2020-03-17 -URI='https://coronavirus.health.ny.gov/county-county-breakdown-positive-cases' - curl "$URI" \ | hxpipe \ -| awk -v location="$location" ' - BEGIN { - # 2020-03-16 morning-afternoon - #target_path = "/html/body/div/div/div/div/div/div/table/tr/td" - - # 2020-03-16 late evening 22:30 - #target_path = "/html/body/div/div/div/div/div/div/tr/td" - - # 2020-03-17 late afternoon 16:35 - target_path = "/html/body/div/div/main/div/div/div/div/div/div/div/div/table/tbody/tr/td" - } - +| awk \ + -v target_location="$target_location" \ + -v target_path="$TARGET_PATH" \ + -v opt_all="$opt_all" \ + ' /^[\(\)]/ { update_node() next @@ -88,17 +89,26 @@ curl "$URI" \ XmlPayload = substr($0, 2, length($0)) } - XmlPath == target_path && XmlPayload == location { - found = 1; + XmlPath == target_path && !location { + location = XmlPayload; next } - XmlPath == target_path && found { - print XmlPayload; - found = 0; + XmlPath == target_path && location { + case_counts[location] = XmlPayload; + location = ""; next; } + END { + if (opt_all) { + for (location in case_counts) + printf("%d|%s\n", case_counts[location], location) + } else { + printf("%d\n", case_counts[target_location]) + } + } + function path_to_string(path, depth, p, i) { p = "" for (i = 1; i <= depth; i++) { @@ -142,4 +152,6 @@ curl "$URI" \ sub("^" $1 s $2 s, "", val) _hxpipe_curr_attrs[key] = val } - ' + ' \ +| column -ts\| \ +| sort -n -k 1