X-Git-Url: https://git.xandkar.net/?a=blobdiff_plain;f=tt.rkt;h=6e227283e08694972a02baf4a703f3661e680076;hb=5fef9856f51272827b23cbf47f3b61d3a51abef0;hp=e002e71da561eb8b175244894e753b469f19cc30;hpb=49df5062e6de6465e08f29a72f05c7592c1b2e98;p=tt.git diff --git a/tt.rkt b/tt.rkt index e002e71..6e22728 100644 --- a/tt.rkt +++ b/tt.rkt @@ -32,7 +32,7 @@ [last : Nonnegative-Integer]) #:transparent) -(define-type Nick-Hist +(define-type Url-Nick-Hist (Immutable-HashTable Url (Immutable-HashTable (Option String) Hist))) (struct User @@ -198,6 +198,9 @@ (: tt-home-dir Path-String) (define tt-home-dir (build-path (expand-user-path "~") ".tt")) +(: pub-peers-dir Path-String) +(define pub-peers-dir (build-path tt-home-dir "peers")) + (: concurrent-filter-map (∀ (α β) (-> Natural (-> α β) (Listof α) (Listof β)))) (define (concurrent-filter-map num-workers f xs) ; TODO preserve order of elements OR communicate that reorder is expected @@ -523,6 +526,7 @@ (: peers->file (-> (Listof Peers) Path-String Void)) (define (peers->file peers path) + (make-parent-directory* path) (display-lines-to-file (map peer->str (sort peers @@ -740,8 +744,8 @@ [(cons p (cons 'error e)) (struct-copy Peer p [comment (format "~s" e)])]) results)) - (peers->file peers-ok (build-path tt-home-dir "peers-last-downloaded-ok")) - (peers->file peers-err (build-path tt-home-dir "peers-last-downloaded-err"))) + (peers->file peers-ok (build-path tt-home-dir "peers-last-downloaded-ok.txt")) + (peers->file peers-err (build-path tt-home-dir "peers-last-downloaded-err.txt"))) (: peers->timeline (-> (Listof Peer) (Listof Msg))) (define (peers->timeline peers) @@ -759,7 +763,7 @@ (define (paths->peers paths) (let* ([paths (match paths ['() - (let ([peer-refs-file (build-path tt-home-dir "peers")]) + (let ([peer-refs-file (build-path tt-home-dir "following.txt")]) (log-debug "No peer ref file paths provided, defaulting to ~v" (path->string peer-refs-file)) @@ -788,6 +792,20 @@ (define (peers-mentioned msgs) (append* (map Msg-mentions msgs))) +(: peers-filter-denied-domains (-> (Listof Peer) (Listof Peer))) +(define (peers-filter-denied-domains peers) + (define deny-file (build-path tt-home-dir "domains-deny.txt")) + (define denied-hosts + (list->set (map string-trim (filter-comments (file->lines deny-file))))) + (define denied-domain-patterns + (set-map denied-hosts (λ (h) (pregexp (string-append "\\." h "$"))))) + (filter + (λ (p) + (define host (url-host (Peer-uri p))) + (not (or (set-member? denied-hosts host) + (ormap (λ (d) (regexp-match? d host)) denied-domain-patterns)))) + peers)) + (: log-writer-stop (-> Thread Void)) (define (log-writer-stop log-writer) (log-message (current-logger) 'fatal 'stop "Exiting." #f) @@ -812,7 +830,7 @@ (current-logger logger) log-writer)) -(: msgs->nick-hist (-> (Listof Msg) Nick-Hist)) +(: msgs->nick-hist (-> (Listof Msg) Url-Nick-Hist)) (define (msgs->nick-hist msgs) (foldl (λ (msg url->nick->hist) @@ -837,13 +855,34 @@ (hash) msgs)) -(: update-nicks-history-files (-> Nick-Hist Void)) -(define (update-nicks-history-files nick-hist) +(: url-nick-hist->file (-> Url-Nick-Hist Path-String Void)) +(define (url-nick-hist->file unh filepath) + (define out (open-output-file filepath #:exists 'replace)) + (for-each + (match-lambda + [(cons url nick->hist) + (displayln (url->string url) out) + (for-each (match-lambda + [(cons nick (Hist freq last)) + (displayln (format " ~a ~a ~a" nick freq last) out)]) + (sort (hash->list nick->hist) + (match-lambda** + [((cons _ (Hist a _)) (cons _ (Hist b _))) + (> a b)])))]) + (sort + (hash->list unh) + (λ (a b) (stringdir (-> Url-Nick-Hist Path-String Void)) +(define (url-nick-hist->dir unh dirpath) (hash-for-each - nick-hist + unh (λ (url nick->hist) - (define path (build-path tt-home-dir "nicks" "seen" (uri-encode (url->string url)))) - (make-parent-directory* path) + (define filename (string-append (uri-encode (url->string url)) ".txt")) + (define filepath (build-path dirpath filename)) + (make-parent-directory* filepath) (display-lines-to-file (map (match-lambda [(cons nick (Hist freq last)) @@ -852,11 +891,17 @@ (match-lambda** [((cons _ (Hist a _)) (cons _ (Hist b _))) (> a b)]))) - path + filepath #:exists 'replace)))) -(: nick-hist-most-by (-> Nick-Hist Url (-> Hist Nonnegative-Integer) (Option String))) -(define (nick-hist-most-by url->nick->hist url by) +(: update-nicks-history-files (-> Url-Nick-Hist Void)) +(define (update-nicks-history-files unh) + (define nicks-dir (build-path tt-home-dir "nicks")) + (url-nick-hist->file unh (build-path nicks-dir "seen.txt")) + (url-nick-hist->dir unh (build-path nicks-dir "seen"))) + +(: url-nick-hist-most-by (-> Url-Nick-Hist Url (-> Hist Nonnegative-Integer) (Option String))) +(define (url-nick-hist-most-by url->nick->hist url by) (match (hash-ref url->nick->hist url #f) [#f #f] [nick->hist @@ -866,19 +911,19 @@ ['() #f] [(cons (cons nick _) _) nick])])) -(: nick-hist-latest (-> Nick-Hist Url (Option String))) -(define (nick-hist-latest nick-hist url) - (nick-hist-most-by nick-hist url Hist-last)) +(: url-nick-hist-latest (-> Url-Nick-Hist Url (Option String))) +(define (url-nick-hist-latest unh url) + (url-nick-hist-most-by unh url Hist-last)) -(: nick-hist-common (-> Nick-Hist Url (Option String))) -(define (nick-hist-common nick-hist url) - (nick-hist-most-by nick-hist url Hist-freq)) +(: url-nick-hist-common (-> Url-Nick-Hist Url (Option String))) +(define (url-nick-hist-common unh url) + (url-nick-hist-most-by unh url Hist-freq)) -(: peers-update-nick-to-common (-> Nick-Hist (Listof Peer) (Listof Peer))) -(define (peers-update-nick-to-common nick-hist peers) +(: peers-update-nick-to-common (-> Url-Nick-Hist (Listof Peer) (Listof Peer))) +(define (peers-update-nick-to-common unh peers) (map (λ (p) - (match (nick-hist-common nick-hist (Peer-uri p)) + (match (url-nick-hist-common unh (Peer-uri p)) [#f p] [n (struct-copy Peer p [nick n])])) peers)) @@ -909,25 +954,25 @@ (check-equal? (hash-ref (hash-ref hist url) nick1) (Hist 1 ts-2)) (check-equal? (hash-ref (hash-ref hist url) nick2) (Hist 2 ts-1)) (check-equal? (hash-ref (hash-ref hist url) nick3) (Hist 3 ts-1)) - (check-equal? (nick-hist-common hist url) nick3) - (check-equal? (nick-hist-latest hist url) nick1))) + (check-equal? (url-nick-hist-common hist url) nick3) + (check-equal? (url-nick-hist-latest hist url) nick1))) (: crawl (-> Void)) (define (crawl) ; TODO Test the non-io parts of crawling (let* ([peers-all-file - (build-path tt-home-dir "peers-all")] + (build-path pub-peers-dir "all.txt")] [peers-mentioned-file - (build-path tt-home-dir "peers-mentioned")] + (build-path pub-peers-dir "mentioned.txt")] [peers-parsed-file - (build-path tt-home-dir "peers-parsed")] + (build-path pub-peers-dir "downloaded-and-parsed.txt")] [peers-cached-file - (build-path tt-home-dir "peers-cached")] + (build-path pub-peers-dir "downloaded.txt")] [peers-cached (peers-cached)] [cached-timeline (peers->timeline peers-cached)] - [nick-hist + [url-nick-hist (msgs->nick-hist cached-timeline)] [peers-mentioned-curr (peers-mentioned cached-timeline)] @@ -940,7 +985,7 @@ peers-mentioned-curr)] [peers-all (peers-update-nick-to-common - nick-hist + url-nick-hist (peers-merge peers-mentioned peers-all-prev peers-cached))] @@ -960,7 +1005,7 @@ (match-lambda [(Peer n _ u c) (list n u c)]) peers-discovered))) - (update-nicks-history-files nick-hist) + (update-nicks-history-files url-nick-hist) (peers->file peers-cached peers-cached-file) (peers->file peers-mentioned @@ -991,20 +1036,21 @@ (: download (-> (Listof String) Positive-Integer Positive-Float Void)) (define (download file-paths num-workers timeout) - (let ([peers (paths->peers file-paths)]) + (let* ([peers-given (paths->peers file-paths)] + [peers-kept (peers-filter-denied-domains peers-given)] + [peers-denied (set-subtract peers-given peers-kept)]) + (log-info "Denied ~a peers" (length peers-denied)) (define-values (_res _cpu real-ms _gc) - (time-apply timeline-download (list num-workers timeout peers))) + (time-apply timeline-download (list num-workers timeout peers-kept))) (log-info "Downloaded timelines from ~a peers in ~a seconds." - (length peers) + (length peers-kept) (/ real-ms 1000.0)))) (: dispatch (-> String Void)) (define (dispatch command) (match command [(or "d" "download") - ; Initially, 15 was fastest out of the tried: 1, 5, 10, 20. Then I - ; started noticing significant slowdowns. Reducing to 5 seems to help. - (let ([num-workers 5] + (let ([num-workers 20] ; 20 was fastest out of the tried: 1, 5, 10, 20, 25, 30. [timeout 10.0]) (command-line #:program "tt download" @@ -1075,7 +1121,7 @@ #:args (command . args) (define log-writer (log-writer-start log-level)) (current-command-line-arguments (list->vector args)) - (set-user-agent-str (build-path tt-home-dir "me")) + (set-user-agent-str (build-path tt-home-dir "user.txt")) ; TODO dispatch should return status with which we should exit after cleanups (dispatch command) (log-writer-stop log-writer))))