X-Git-Url: https://git.xandkar.net/?a=blobdiff_plain;f=tt.rkt;h=19b61ec179af87900653f80ad63fb6474f32e29b;hb=9c34c974c9c5d324cd432499ae55b3e2b3f1b059;hp=640e5429e35ae05e6bdfd9e02b45749844a78e12;hpb=dbc2628006d6af8480978a293ab03882b43383c8;p=tt.git diff --git a/tt.rkt b/tt.rkt index 640e542..19b61ec 100644 --- a/tt.rkt +++ b/tt.rkt @@ -3,10 +3,9 @@ (require openssl/sha1) (require racket/date) (require - net/http-client + net/head net/uri-codec - net/url-string - net/url-structs) + net/url) (require (prefix-in info: "info.rkt")) @@ -24,23 +23,23 @@ (U 'old->new 'new->old)) -(struct msg +(struct Msg ([ts-epoch : Integer] [ts-orig : String] [nick : (Option String)] [uri : Url] [text : String] - [mentions : (Listof Peer)]) - #:type-name Msg) + [mentions : (Listof Peer)])) (struct Peer ([nick : (Option String)] - [uri : Url])) + [uri : Url]) + #:transparent) (: tt-home-dir Path-String) (define tt-home-dir (build-path (expand-user-path "~") ".tt")) -(: concurrent-filter-map (∀ (α β) (-> Natural (-> α β) (Listof α)))) +(: concurrent-filter-map (∀ (α β) (-> Natural (-> α β) (Listof α) (Listof β)))) (define (concurrent-filter-map num-workers f xs) ; TODO preserve order of elements OR communicate that reorder is expected ; TODO switch from mailboxes to channels @@ -91,25 +90,25 @@ [n (vector-length colors)]) (λ (out-format color-i msg) (let ([color (vector-ref colors (modulo color-i n))] - [nick (msg-nick msg)] - [uri (url->string (msg-uri msg))] - [text (msg-text msg)] - [mentions (msg-mentions msg)]) + [nick (Msg-nick msg)] + [uri (url->string (Msg-uri msg))] + [text (Msg-text msg)] + [mentions (Msg-mentions msg)]) (match out-format ['single-line (let ([nick (if nick nick uri)]) (printf "~a \033[1;37m<~a>\033[0m \033[0;~am~a\033[0m~n" (parameterize ([date-display-format 'iso-8601]) - (date->string (seconds->date [msg-ts-epoch msg]) #t)) + (date->string (seconds->date (Msg-ts-epoch msg)) #t)) nick color text))] ['multi-line (let ([nick (if nick (string-append nick " ") "")]) (printf "~a (~a)~n\033[1;37m<~a~a>\033[0m~n\033[0;~am~a\033[0m~n~n" (parameterize ([date-display-format 'rfc2822]) - (date->string (seconds->date [msg-ts-epoch msg]) #t)) - (msg-ts-orig msg) + (date->string (seconds->date (Msg-ts-epoch msg)) #t)) + (Msg-ts-orig msg) nick uri color text))]))))) (: rfc3339->epoch (-> String (Option Nonnegative-Integer))) @@ -167,7 +166,7 @@ [(list _wholething nick-uri) (str->peer nick-uri)])) (regexp-match* #px"@<[^\\s]+([\\s]+)?[^>]+>" text))]) - (msg ts-epoch ts-orig nick uri text mentions)) + (Msg ts-epoch ts-orig nick uri text mentions)) (begin (log-error "Msg rejected due to invalid timestamp: ~v, nick:~v, uri:~v" @@ -196,10 +195,10 @@ z)] [m (str->msg n u (string-append ts sep txt))]) (check-not-false m) - (check-equal? (msg-nick m) n) - (check-equal? (msg-uri m) u) - (check-equal? (msg-text m) txt) - (check-equal? (msg-ts-orig m) ts (format "Given: ~v" ts)) + (check-equal? (Msg-nick m) n) + (check-equal? (Msg-uri m) u) + (check-equal? (Msg-text m) txt) + (check-equal? (Msg-ts-orig m) ts (format "Given: ~v" ts)) ))) (let* ([ts "2020-11-18T22:22:09-0500"] @@ -208,26 +207,26 @@ [nick "foo"] [uri "bar"] [actual (str->msg nick uri (string-append ts tab text))] - [expected (msg 1605756129 ts nick uri text)]) + [expected (Msg 1605756129 ts nick uri text)]) (check-equal? - (msg-ts-epoch actual) - (msg-ts-epoch expected) + (Msg-ts-epoch actual) + (Msg-ts-epoch expected) "str->msg ts-epoch") (check-equal? - (msg-ts-orig actual) - (msg-ts-orig expected) + (Msg-ts-orig actual) + (Msg-ts-orig expected) "str->msg ts-orig") (check-equal? - (msg-nick actual) - (msg-nick expected) + (Msg-nick actual) + (Msg-nick expected) "str->msg nick") (check-equal? - (msg-uri actual) - (msg-uri expected) + (Msg-uri actual) + (Msg-uri expected) "str->msg uri") (check-equal? - (msg-text actual) - (msg-text expected) + (Msg-text actual) + (Msg-text expected) "str->msg text"))) (: str->lines (-> String (Listof String))) @@ -244,6 +243,8 @@ (: cache-dir Path-String) (define cache-dir (build-path tt-home-dir "cache")) +(define cache-object-dir (build-path cache-dir "objects")) + (: url->cache-file-path-v1 (-> Url Path-String)) (define (url->cache-file-path-v1 uri) (define (hash-sha1 str) : (-> String String) @@ -251,13 +252,19 @@ (define digest (sha1 in)) (close-input-port in) digest) - (build-path cache-dir (hash-sha1 (url->string uri)))) + (build-path cache-object-dir (hash-sha1 (url->string uri)))) (: url->cache-file-path-v2 (-> Url Path-String)) (define (url->cache-file-path-v2 uri) - (build-path cache-dir (uri-encode (url->string uri)))) + (build-path cache-object-dir (uri-encode (url->string uri)))) + +(define url->cache-object-path url->cache-file-path-v2) + +(define (url->cache-etag-path uri) + (build-path cache-dir "etags" (uri-encode (url->string uri)))) -(define url->cache-file-path url->cache-file-path-v2) +(define (url->cache-lmod-path uri) + (build-path cache-dir "lmods" (uri-encode (url->string uri)))) ; TODO Return Option (: uri-read-cached (-> Url String)) @@ -297,8 +304,49 @@ (filter-map str->peer (filter-comments (str->lines str)))) (: file->peers (-> Path-String (Listof Peer))) -(define (file->peers filename) - (str->peers (file->string filename))) +(define (file->peers file-path) + (if (file-exists? file-path) + (str->peers (file->string file-path)) + (begin + (log-error "File does not exist: ~v" (path->string file-path)) + '()))) + +(define re-rfc2822 + #px"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0-9]{2}) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) ([0-2][0-9]):([0-6][0-9]):([0-6][0-9]) GMT") + +(: b->n (-> Bytes (Option Number))) +(define (b->n b) + (string->number (bytes->string/utf-8 b))) + +(: mon->num (-> Bytes Natural)) +(define/match (mon->num mon) + [(#"Jan") 1] + [(#"Feb") 2] + [(#"Mar") 3] + [(#"Apr") 4] + [(#"May") 5] + [(#"Jun") 6] + [(#"Jul") 7] + [(#"Aug") 8] + [(#"Sep") 9] + [(#"Oct") 10] + [(#"Nov") 11] + [(#"Dec") 12]) + +(: rfc2822->epoch (-> Bytes (Option Nonnegative-Integer))) +(define (rfc2822->epoch timestamp) + (match (regexp-match re-rfc2822 timestamp) + [(list _ _ dd mo yyyy HH MM SS) + #:when (and dd mo yyyy HH MM SS) + (find-seconds (b->n SS) + (b->n MM) + (b->n HH) + (b->n dd) + (mon->num mo) + (b->n yyyy) + #f)] + [_ + #f])) (: user-agent String) (define user-agent @@ -315,43 +363,70 @@ (format "+~a" prog-uri))]) (format "~a/~a (~a)" prog-name prog-version user))) +(: header-get (-> (Listof Bytes) Bytes (Option Bytes))) +(define (header-get headers name) + (match (filter-map (curry extract-field name) headers) + [(list val) val] + [_ #f])) + (: uri-download (-> Url Void)) (define (uri-download u) - (define cache-file-path (url->cache-file-path u)) - (log-debug "uri-download ~v into ~v" u cache-file-path) - (match* ((url-scheme u) (url-host u) (url-port u)) - [(s h p) - #:when (and s h) - (define ssl? (string=? s "https")) - (define-values (status-line headers body-input) - ; TODO Timeout. Currently hangs on slow connections. - (http-sendrecv - h - (url->string (struct-copy url u [scheme #f] [host #f])) - #:ssl? ssl? - #:port (cond [p p] [ssl? 443] [else 80]) - #:headers (list (format "User-Agent: ~a" user-agent)) - )) - (log-debug "headers: ~v" headers) - (log-debug "status-line: ~v" status-line) - (define status - (string->number (second (string-split (bytes->string/utf-8 status-line))))) - (log-debug "status: ~v" status) - ; TODO Handle redirects - (if (= 200 status) - (begin - (make-parent-directory* cache-file-path) - (call-with-output-file cache-file-path - (curry copy-port body-input) - #:exists 'replace)) - (raise status))] - [(_ _ _) - (log-error "Invalid URI: ~v" u)])) + (define cached-object-path (url->cache-object-path u)) + (define cached-etag-path (url->cache-etag-path u)) + (define cached-lmod-path (url->cache-lmod-path u)) + (log-debug "uri-download ~v into ~v" u cached-object-path) + (define-values (status-line headers body-input) + ; TODO Timeout. Currently hangs on slow connections. + (http-sendrecv/url u #:headers (list (format "User-Agent: ~a" user-agent)))) + (log-debug "headers: ~v" headers) + (log-debug "status-line: ~v" status-line) + (define status + (string->number (second (string-split (bytes->string/utf-8 status-line))))) + (log-debug "status: ~v" status) + ; TODO Handle redirects + (match status + [200 + (let* ([etag (header-get headers #"ETag")] + [lmod (header-get headers #"Last-Modified")] + [lmod-curr (if lmod (rfc2822->epoch lmod) #f)] + [lmod-prev (if (file-exists? cached-lmod-path) + (rfc2822->epoch (file->bytes cached-lmod-path)) + #f)]) + (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) + (unless (or (and etag + (file-exists? cached-etag-path) + (bytes=? etag (file->bytes cached-etag-path)) + (begin + (log-info "ETags match, skipping the rest of ~v" (url->string u)) + #t)) + (and lmod-curr + lmod-prev + (<= lmod-curr lmod-prev) + (begin + (log-info "Last-Modified <= current skipping the rest of ~v" (url->string u)) + #t))) + (begin + (log-info + "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" + (url->string u) etag lmod) + (make-parent-directory* cached-object-path) + (make-parent-directory* cached-etag-path) + (make-parent-directory* cached-lmod-path) + (call-with-output-file cached-object-path + (curry copy-port body-input) + #:exists 'replace) + (when etag + (display-to-file etag cached-etag-path #:exists 'replace)) + (when lmod + (display-to-file lmod cached-lmod-path #:exists 'replace)))) + (close-input-port body-input))] + [_ + (raise status)])) (: timeline-print (-> Out-Format (Listof Msg) Void)) (define (timeline-print out-format timeline) (void (foldl (match-lambda** - [((and m (msg _ _ nick _ _ _)) (cons prev-nick i)) + [((and m (Msg _ _ nick _ _ _)) (cons prev-nick i)) (let ([i (if (equal? prev-nick nick) i (+ 1 i))]) (msg-print out-format i m) (cons nick i))]) @@ -394,15 +469,31 @@ ['old->new <] ['new->old >])) (sort (append* (filter-map peer->msgs peers)) - (λ (a b) (cmp (msg-ts-epoch a) (msg-ts-epoch b))))) + (λ (a b) (cmp (Msg-ts-epoch a) (Msg-ts-epoch b))))) + +(: paths->peers (-> (Listof String) (Listof Peer))) +(define (paths->peers paths) + (let* ([paths (match paths + ['() + (let ([peer-refs-file (build-path tt-home-dir "peers")]) + (log-debug + "No peer ref file paths provided, defaulting to ~v" + (path->string peer-refs-file)) + (list peer-refs-file))] + [paths + (log-debug "Peer ref file paths provided: ~v" paths) + (map string->path paths)])] + [peers (append* (map file->peers paths))]) + (log-info "Read-in ~a peers." (length peers)) + peers)) (: log-writer-stop (-> Thread Void)) (define (log-writer-stop log-writer) (log-message (current-logger) 'fatal 'stop "Exiting." #f) (thread-wait log-writer)) -(: logger-start (-> Log-Level Thread)) -(define (logger-start level) +(: log-writer-start (-> Log-Level Thread)) +(define (log-writer-start level) (let* ([logger (make-logger #f #f level #f)] [log-receiver @@ -438,7 +529,7 @@ "u, upload : Upload your twtxt file (alias to execute ~/.tt/upload)." "" #:args (command . args) - (define log-writer (logger-start log-level)) + (define log-writer (log-writer-start log-level)) (current-command-line-arguments (list->vector args)) (match command [(or "d" "download") @@ -452,11 +543,10 @@ [("-j" "--jobs") njobs "Number of concurrent jobs." (set! num-workers (string->number njobs))] - #:args (filename) + #:args file-paths (define-values (_res _cpu real-ms _gc) - (time-apply timeline-download (list num-workers (file->peers filename)))) - (log-info "Timeline downloaded in ~a seconds." (/ real-ms 1000.0)) - (log-writer-stop log-writer)))] + (time-apply timeline-download (list num-workers (paths->peers file-paths)))) + (log-info "Timeline downloaded in ~a seconds." (/ real-ms 1000.0))))] [(or "u" "upload") (command-line #:program @@ -482,6 +572,6 @@ [("-l" "--long") "Long output format" (set! out-format 'multi-line)] - #:args (filename) - (timeline-print out-format (timeline-read order (file->peers filename)))))] - )))) + #:args file-paths + (timeline-print out-format (timeline-read order (paths->peers file-paths)))))]) + (log-writer-stop log-writer))))