(∀ (α β) (U (cons 'ok α)
(cons 'error β))))
+(struct Hist
+ ([freq : Nonnegative-Integer]
+ [last : Nonnegative-Integer])
+ #:transparent)
+
+(define-type Nick-Hist
+ (Immutable-HashTable Url (Immutable-HashTable (Option String) Hist)))
+
(struct User
([uri : Url]
[nick : (Option String)]))
[comment : (Option String)])
#:transparent)
-(struct Resp
- ([status-line : String]
- [headers : (Listof Bytes)]
- [body-input : Input-Port])
- #:transparent)
-
(: prog Prog)
(define prog
(Prog "tt" (info:#%info-lookup 'version)))
[n2 (Peer-nick p2)]
[p (cond
; TODO Try to pick from nicks db: preferred, otherwise seen
- [(and (not n1) (not n2)) p1]
- [(and n1 n2 ) p1]
+ [(and (not n1) (not n2)) p1] ; TODO update with most-common nick
+ [(and n1 n2 ) p1] ; TODO compare which is more-common
[(and n1 (not n2)) p1]
[(and (not n1) n2) p2]
[else
(module+ test
(check-equal? (str->lines "abc\ndef\n\nghi") '("abc" "def" "ghi")))
+; TODO Should return 2 things: 1) msgs; 2) metadata parsed from comments
+; TODO Update peer nick based on metadata?
(: str->msgs (-> Peer String (Listof Msg)))
(define (str->msgs peer str)
(filter-map (λ (line) (str->msg peer line))
(-> Url (Listof (U Bytes String)) Input-Port
(U 'skipped-cached 'downloaded-new))) ; TODO 'ok|'error ?
(define (uri-download-from-port u headers body-input)
+ ; TODO Update message db from here? or where?
+ ; - 1st try can just be an in-memory set that gets written-to
+ ; and read-from disk as a whole.
(define u-str (url->string u))
(log-debug "uri-download-from-port ~v into ~v" u-str cached-object-path)
(define cached-object-path (url->cache-object-path u))
(channel-put timeout-chan '(error . timeout)))))
(define result-thread
(thread (λ ()
- ; XXX We timeout getting a response, but body download could
- ; also take a long time and we might want to time that out as
- ; well, but then we may end-up with partially downloaded
- ; objects. But that could happen anyway if the server drops the
- ; connection for whatever reason.
- ;
- ; Maybe that is OK once we start treating the
- ; downloaded object as an addition to the stored set of
- ; messages, rather than the final set of messages.
-
- ; TODO message db
- ; - 1st try can just be an in-memory set that gets written-to
- ; and read-from disk as a whole.
(define result
(with-handlers
; TODO Maybe name each known errno? (exn:fail:network:errno-errno e)
(http-sendrecv/url
u
#:headers (list (format "User-Agent: ~a" user-agent-str))))
- `(ok . ,(Resp status-line headers body-input))))
+ (log-debug "headers: ~v" headers)
+ (log-debug "status-line: ~v" status-line)
+ (define status
+ (string->number (second (string-split (bytes->string/utf-8 status-line)))))
+ (log-debug "status: ~v" status)
+ (let ([result
+ ; TODO Handle redirects.
+ ; TODO Should a redirect update a peer URL?
+ (match status
+ [200
+ `(ok . ,(uri-download-from-port u headers body-input))]
+ [_
+ `(error . (http-not-ok . ,status))])])
+ (close-input-port body-input)
+ result)))
(channel-put result-chan result))))
- (define result
- (sync timeout-chan
- result-chan))
+ (define result (sync timeout-chan result-chan))
(kill-thread result-thread)
(kill-thread timeout-thread)
- (match result
- [(cons 'error _)
- result]
- [(cons 'ok (Resp status-line headers body-input))
- (log-debug "headers: ~v" headers)
- (log-debug "status-line: ~v" status-line)
- (define status
- (string->number (second (string-split (bytes->string/utf-8 status-line)))))
- (log-debug "status: ~v" status)
- ; TODO Handle redirects. Should be within same timeout as req and body.
- (let ([result
- (match status
- [200
- `(ok . ,(uri-download-from-port u headers body-input))]
- [_
- `(error . (http . ,status))])])
- (close-input-port body-input)
- result)]))
+ result)
(: timeline-print (-> Out-Format (Listof Msg) Void))
(define (timeline-print out-format timeline)
(current-logger logger)
log-writer))
-; TODO Track timestamps of when a nick was seen.
-; TODO Track number of times a nick was seen.
-(: peers->nicks-by-url (-> (Listof Peer) (Immutable-HashTable Url (Setof String))))
-(define (peers->nicks-by-url peers)
+(: msgs->nick-hist (-> (Listof Msg) Nick-Hist))
+(define (msgs->nick-hist msgs)
(foldl
- (match-lambda**
- [((Peer #f _ _ _) tbl) tbl]
- [((Peer n u _ _) tbl) (hash-update tbl u (λ (nicks) (set-add nicks n)) (set))])
+ (λ (msg url->nick->hist)
+ (match-define (Msg curr _ from _ mentions) msg)
+ (foldl
+ (λ (peer url->nick->hist)
+ (match-define (Peer nick url _ _) peer)
+ (if nick
+ (hash-update url->nick->hist
+ url
+ (λ (nick->hist)
+ (hash-update nick->hist
+ nick
+ (match-lambda
+ [(Hist freq prev)
+ (Hist (+ 1 freq) (max prev curr))])
+ (Hist 0 0)))
+ (hash))
+ url->nick->hist))
+ url->nick->hist
+ (cons from mentions)))
(hash)
- peers))
+ msgs))
-(: update-nicks-history (-> (Listof Peer) Void))
-(define (update-nicks-history peers)
+(: update-nicks-history-files (-> Nick-Hist Void))
+(define (update-nicks-history-files nick-hist)
(hash-for-each
- (peers->nicks-by-url peers)
- (λ (url nicks-curr)
+ nick-hist
+ (λ (url nick->hist)
(define path (build-path tt-home-dir "nicks" "seen" (uri-encode (url->string url))))
- (define nicks-prev
- (if (file-exists? path)
- (list->set (file->lines path))
- (begin
- (make-parent-directory* path)
- (set))))
+ (make-parent-directory* path)
(display-lines-to-file
- (set->list (set-union nicks-prev nicks-curr))
+ (map (match-lambda
+ [(cons nick (Hist freq last))
+ (format "~a ~a ~a" nick freq last)])
+ (sort (hash->list nick->hist)
+ (match-lambda**
+ [((cons _ (Hist a _)) (cons _ (Hist b _)))
+ (> a b)])))
path
#:exists 'replace))))
+(: nick-hist-most-by (-> Nick-Hist Url (-> Hist Nonnegative-Integer) (Option String)))
+(define (nick-hist-most-by url->nick->hist url by)
+ (match (hash-ref url->nick->hist url #f)
+ [#f #f]
+ [nick->hist
+ (match (sort (hash->list nick->hist)
+ (λ (a b) (> (by (cdr a))
+ (by (cdr b)))))
+ ['() #f]
+ [(cons (cons nick _) _) nick])]))
+
+(: nick-hist-latest (-> Nick-Hist Url (Option String)))
+(define (nick-hist-latest nick-hist url)
+ (nick-hist-most-by nick-hist url Hist-last))
+
+(: nick-hist-common (-> Nick-Hist Url (Option String)))
+(define (nick-hist-common nick-hist url)
+ (nick-hist-most-by nick-hist url Hist-freq))
+
+(: peers-update-nick-to-common (-> Nick-Hist (Listof Peer) (Listof Peer)))
+(define (peers-update-nick-to-common nick-hist peers)
+ (map
+ (λ (p)
+ (match (nick-hist-common nick-hist (Peer-uri p))
+ [#f p]
+ [n (struct-copy Peer p [nick n])]))
+ peers))
+
+(module+ test
+ (let* ([url-str "http://foo"]
+ [url (string->url url-str)]
+ [nick1 "a"]
+ [nick2 "b"]
+ [nick3 "c"]
+ [ts-str-1 "2021-11-29T23:29:08-0500"]
+ [ts-str-2 "2021-11-29T23:30:00-0500"]
+ [ts-1 (rfc3339->epoch ts-str-1)]
+ [ts-2 (rfc3339->epoch ts-str-2)]
+ [msgs
+ (map (match-lambda
+ [(cons ts-str nick)
+ (str->msg (str->peer "test http://test")
+ (string-append ts-str " Hi @<" nick " " url-str ">"))])
+ (list (cons ts-str-2 nick1)
+ (cons ts-str-1 nick2)
+ (cons ts-str-1 nick2)
+ (cons ts-str-1 nick3)
+ (cons ts-str-1 nick3)
+ (cons ts-str-1 nick3)))]
+ [hist
+ (msgs->nick-hist msgs)])
+ (check-equal? (hash-ref (hash-ref hist url) nick1) (Hist 1 ts-2))
+ (check-equal? (hash-ref (hash-ref hist url) nick2) (Hist 2 ts-1))
+ (check-equal? (hash-ref (hash-ref hist url) nick3) (Hist 3 ts-1))
+ (check-equal? (nick-hist-common hist url) nick3)
+ (check-equal? (nick-hist-latest hist url) nick1)))
+
(: crawl (-> Void))
(define (crawl)
; TODO Test the non-io parts of crawling
(peers-cached)]
[cached-timeline
(peers->timeline peers-cached)]
+ [nick-hist
+ (msgs->nick-hist cached-timeline)]
[peers-mentioned-curr
(peers-mentioned cached-timeline)]
[peers-mentioned-prev
[peers-all-prev
(file->peers peers-all-file)]
[peers-mentioned
- (begin
- ; XXX Updating nicks before running peers-merge,
- ; since peers-merge is expected to refer to it in the future.
- (update-nicks-history (append peers-cached
- peers-mentioned-curr
- peers-mentioned-prev
- peers-all-prev))
- (peers-merge peers-mentioned-prev
- peers-mentioned-curr))]
+ (peers-merge peers-mentioned-prev
+ peers-mentioned-curr)]
[peers-all
- (peers-merge peers-mentioned
- peers-all-prev
- peers-cached)]
+ (peers-update-nick-to-common
+ nick-hist
+ (peers-merge peers-mentioned
+ peers-all-prev
+ peers-cached))]
[peers-discovered
(set->list (set-subtract (make-immutable-peers peers-all)
(make-immutable-peers peers-all-prev)))]
(match-lambda
[(Peer n _ u c) (list n u c)])
peers-discovered)))
+ (update-nicks-history-files nick-hist)
(peers->file peers-cached
peers-cached-file)
(peers->file peers-mentioned