+- [-] Output formats:
+ - [x] text long
+ - [x] text short
+ - [ ] HTML
+ - [ ] JSON
+- [-] Peer discovery
+ - [-] parse peer refs from peer timelines
+ - [x] mentions from timeline messages
+ - [x] @<source.nick source.url>
+ - [x] @<source.url>
+ - [ ] "following" from timeline comments: # following = <nick> <uri>
+ 1. split file lines in 2 groups: comments and messages
+ 2. dispatch messages parsing as usual
+ 3. dispatch comments parsing for:
+ - # following = <nick> <uri>
+ - what else?
+ - [ ] Parse User-Agent web access logs.
+ - [-] Update peer ref file(s)
+ - [x] peers-all
+ - [x] peers-mentioned
+ - [ ] peers-followed (by others, parsed from comments)
+ - [ ] peers-up (no net errors)
+ - [ ] peers-down (net errors)
+ - [ ] peers-valid (up and parsed at least 1 message)
+ - [ ] redirects?
+ Rough sketch from late 2019:
+ let read file =
+ ...
+ let write file peers =
+ ...
+ let fetch peer =
+ (* Fetch could mean either or both of:
+ * - fetch peer's we-are-twtxt.txt
+ * - fetch peer's twtxt.txt and extract mentioned peer URIs
+ * *)
+ ...
+ let test peers =
+ ...
+ let rec discover peers_old =
+ let peers_all =
+ Set.fold peers_old ~init:peers_old ~f:(fun peers p ->
+ match fetch p with
+ | Error _ ->
+ (* TODO: Should p be moved to down set here? *)
+ log_warning ...;
+ peers
+ | Ok peers_fetched ->
+ Set.union peers peers_fetched
+ )
+ in
+ if Set.empty (Set.diff peers_old peers_all) then
+ peers_all
+ else
+ discover peers_all
+ let rec loop interval peers_old =
+ let peers_all = discover peers_old in
+ let (peers_up, peers_down) = test peers_all in
+ write "peers-all.txt" peers_all;
+ write "peers-up.txt" peers_up;
+ write "peers-down.txt" peers_down;
+ sleep interval;
+ loop interval peers_all
+ let () =
+ loop (Sys.argv.(1)) (read "peers-all.txt")
+
+Backlog
+-------
+- [ ] Batch download jobs by domain:
+ - at most 1 worker per domain
+ - more than 1 domain per worker is OK
+- [ ] Remove mention link noise in read view.
+ in short view: just abbreviate @<nick uri> to @nick
+ in long view: abbreviate like above AND list the full versions after the text
+- [ ] Crawl only valid objects
+ REQUIRES: peers-valid ref file update
+- [ ] Reduce log noise
+- [ ] Parallelize crawling by file
+- [ ] Parallelize reading by file
+- [ ] Support date without time in timestamps
+- [ ] Associate cached object with nick.
+- [ ] Crawl downloaded web access logs
+- [ ] download-command hook to grab the access logs
+
+ (define (parse log-line)
+ (match (regexp-match #px"([^/]+)/([^ ]+) +\\(\\+([a-z]+://[^;]+); *@([^\\)]+)\\)" log-line)
+ [(list _ client version uri nick) (cons nick uri)]
+ [_ #f]))
+
+ (list->set (filter-map parse (file->lines "logs/combined-access.log")))
+
+ (filter (λ (p) (equal? 'file (file-or-directory-type p))) (directory-list logs-dir))
+
+- [ ] user-agent file as CLI option - need to run at least the crawler as another user
+- [ ] Support fetching rsync URIs
+- [ ] Check for peer duplicates:
+ - [ ] same nick for N>1 URIs
+ - [ ] same URI for N>1 nicks
+- [ ] Background polling and incremental timeline updates.
+ We can mark which messages have already been printed and print new ones as
+ they come in.
+ REQUIRES: polling
+- [ ] Polling mode/command, where tt periodically polls peer timelines
+- [ ] nick tiebreaker(s)
+ - [ ] some sort of a hash of URI?
+ - [ ] angry-purple-tiger kind if thingie?
+ - [ ] P2P nick registration?
+ - [ ] Peers vote by claiming to have seen a nick->uri mapping?
+ The inherent race condition would be a feature, since all user name
+ registrations are races.
+ REQUIRES: blockchain
+- [ ] stats
+ - [ ] download times per peer
+- [ ] Support redirects
+ - should permanent redirects update the peer ref somehow?
+- [ ] optional text wrap
+- [ ] write
+- [ ] peer refs set operations (perhaps better done externally?)
+- [ ] timeline as a result of a query (peer ref set op + filter expressions)
+- [ ] config files
+- [ ] highlight mentions
+- [ ] filter on mentions
+- [ ] highlight hashtags
+- [ ] filter on hashtags
+- [ ] hashtags as channels? initial hashtag special?
+- [ ] query language
+- [ ] console logger colors by level ('error)
+- [ ] file logger ('debug)