| 1 | #! /bin/sh |
| 2 | # |
| 3 | # Dependencies: |
| 4 | # - curl |
| 5 | # - awk |
| 6 | # - hxpipe (packaged in html-xml-utils on Debian and Ubuntu) |
| 7 | |
| 8 | VALID_LOCATIONS=' |
| 9 | Boston: |
| 10 | Massachusetts: |
| 11 | ' |
| 12 | |
| 13 | DEFAULT_LOCATION='Boston:' |
| 14 | |
| 15 | usage() { |
| 16 | printf "Usage: %s [LOCATION]\n" "$0" |
| 17 | printf '\n' |
| 18 | printf 'LOCATION =' |
| 19 | printf '%s\n' "$VALID_LOCATIONS" |
| 20 | printf "Default LOCATION:\n %s\n" "$DEFAULT_LOCATION" |
| 21 | exit 1 |
| 22 | } |
| 23 | |
| 24 | case "$1" in |
| 25 | '-h') usage;; |
| 26 | '' ) location="$DEFAULT_LOCATION";; |
| 27 | * ) location="$1";; |
| 28 | esac |
| 29 | |
| 30 | curl 'https://www.boston.gov/news/coronavirus-disease-covid-19-boston' \ |
| 31 | | hxpipe \ |
| 32 | | awk ' |
| 33 | /^[\(\)]/ { |
| 34 | update_node() |
| 35 | next |
| 36 | } |
| 37 | |
| 38 | /^A/ && $2 == "CDATA" { |
| 39 | update_node_attributes() |
| 40 | next |
| 41 | } |
| 42 | |
| 43 | /^-/ { |
| 44 | XmlPayload = substr($0, 2, length($0)) |
| 45 | } |
| 46 | |
| 47 | XmlPath == "/html/body/div/div/input/header/div/div/div/section/article/div/div/div/div/div/div/div/div/address" \ |
| 48 | && XmlPayload ~ /^[A-Z][a-z]+: +[0-9]+/ { |
| 49 | print XmlPayload |
| 50 | } |
| 51 | |
| 52 | function path_to_string(path, depth, p, i) { |
| 53 | p = "" |
| 54 | for (i = 1; i <= depth; i++) { |
| 55 | p = p "/" path[i] |
| 56 | } |
| 57 | return p |
| 58 | } |
| 59 | |
| 60 | function update_node( paren, name, key, val, path, attr) { |
| 61 | paren = substr($1, 1, 1) |
| 62 | name = substr($1, 2, length($1) - 1) |
| 63 | if (paren == "(") { |
| 64 | _depth++ |
| 65 | _path[_depth] = name |
| 66 | XmlPath = path_to_string(_path, _depth) |
| 67 | for (key in _hxpipe_curr_attrs) { |
| 68 | val = _hxpipe_curr_attrs[key] |
| 69 | XmlAttr[XmlPath, key] = val |
| 70 | } |
| 71 | } else if (paren == ")") { |
| 72 | delete _hxpipe_curr_attrs |
| 73 | XmlPayload = "" |
| 74 | for (key in XmlAttr) { |
| 75 | split(key, k, SUBSEP) |
| 76 | path = k[1] |
| 77 | attr = k[2] |
| 78 | if (path == XmlPath) delete XmlAttr[key] |
| 79 | } |
| 80 | _depth-- |
| 81 | XmlPath = path_to_string(_path, _depth) |
| 82 | } else { |
| 83 | printf("ERROR in input line %d - not a parenthesis: \"%s\"\n", NR, paren) > "/dev/stderr" |
| 84 | exit 1 |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | function update_node_attributes( key, val, s) { |
| 89 | key = substr($1, 2, length($1)) |
| 90 | val = $0 |
| 91 | s = " +" |
| 92 | sub("^" $1 s $2 s, "", val) |
| 93 | _hxpipe_curr_attrs[key] = val |
| 94 | } |
| 95 | ' \ |
| 96 | | awk -v location="$location" ' |
| 97 | $1 == location { |
| 98 | weird_space_character = " "; |
| 99 | n = split($2, cases, weird_space_character); |
| 100 | print cases[1] |
| 101 | }' |