X-Git-Url: https://git.xandkar.net/?a=blobdiff_plain;f=tt.rkt;h=19397e2edade88f2a93ab26feefb636579bebbe3;hb=b8b29fbb27481dd8b1475f5714bfbc61d266dacb;hp=f69df52f1caf317be61f027003a9b2b4fa041794;hpb=1864c06e5f5541b5b716a55ef642f2af11f3f30a;p=tt.git diff --git a/tt.rkt b/tt.rkt index f69df52..19397e2 100644 --- a/tt.rkt +++ b/tt.rkt @@ -23,6 +23,10 @@ (U 'old->new 'new->old)) +(define-type Result + (∀ (α β) (U (cons 'ok α) + (cons 'error β)))) + (struct Msg ([ts-epoch : Integer] [ts-orig : String] @@ -32,14 +36,16 @@ [mentions : (Listof Peer)])) (struct Peer - ([nick : (Option String)] - [uri : Url]) + ([nick : (Option String)] + [uri : Url] + [comment : (Option String)]) #:transparent) (struct Resp ([status-line : String] [headers : (Listof Bytes)] - [body-input : Input-Port])) + [body-input : Input-Port]) + #:transparent) (: tt-home-dir Path-String) (define tt-home-dir (build-path (expand-user-path "~") ".tt")) @@ -147,7 +153,7 @@ local-time?)]) (+ ts-epoch tz-offset))] [_ - (log-error "Invalid timestamp: ~v" ts) + (log-debug "Invalid timestamp: ~v" ts) #f])))) (: str->msg (-> (Option String) Url String (Option Msg))) @@ -158,7 +164,7 @@ (with-handlers* ([exn:fail? (λ (e) - (log-error + (log-debug "Failed to parse msg: ~v, from: ~v, at: ~v, because: ~v" str-head nick (url->string uri) e) #f)]) @@ -174,7 +180,7 @@ (regexp-match* #px"@<[^\\s]+([\\s]+)?[^>]+>" text))]) (Msg ts-epoch ts-orig nick uri text mentions)) (begin - (log-error + (log-debug "Msg rejected due to invalid timestamp: ~v, nick:~v, uri:~v" str-head nick (url->string uri)) #f)))] @@ -276,8 +282,7 @@ (define (url->cache-lmod-path uri) (build-path cache-dir "lmods" (uri-encode (url->string uri)))) -; TODO Return Option -(: uri-read-cached (-> Url String)) +(: uri-read-cached (-> Url (Option String))) (define (uri-read-cached uri) (define path-v1 (url->cache-file-path-v1 uri)) (define path-v2 (url->cache-file-path-v2 uri)) @@ -286,28 +291,65 @@ (if (file-exists? path-v2) (file->string path-v2) (begin - (log-warning "Cache file not found for URI: ~a" (url->string uri)) - ""))) + (log-debug "Cache file not found for URI: ~a" (url->string uri)) + #f))) -(: uri? (-> String Boolean)) -(define (uri? str) - (regexp-match? #rx"^[a-z]+://.*" (string-downcase str))) +(: str->url (-> String (Option String))) +(define (str->url s) + (with-handlers* + ([exn:fail? (λ (e) #f)]) + (string->url s))) (: str->peer (String (Option Peer))) (define (str->peer str) (log-debug "Parsing peer string: ~v" str) - (with-handlers* - ([exn:fail? - (λ (e) - (log-error "Invalid URI in string: ~v, exn: ~v" str e) - #f)]) - (match (string-split str) - [(list u) #:when (uri? u) (Peer #f (string->url u))] - [(list n u) #:when (uri? u) (Peer n (string->url u))] - [_ - (log-error "Invalid peer string: ~v" str) - #f]))) + (match + (regexp-match + #px"(([^\\s\t]+)[\\s\t]+)?([a-zA-Z]+://[^\\s\t]*)[\\s\t]*(#\\s*(.*))?" + str) + [(list _wholething + _nick-with-space + nick + url + _comment-with-hash + comment) + (match (str->url url) + [#f + (log-error "Invalid URI in peer string: ~v" str) + #f] + [url (Peer nick url comment)])] + [_ + (log-debug "Invalid peer string: ~v" str) + #f])) +(module+ test + (check-equal? + (str->peer "foo http://bar/file.txt # some rando") + (Peer "foo" (str->url "http://bar/file.txt") "some rando")) + (check-equal? + (str->peer "http://bar/file.txt # some rando") + (Peer #f (str->url "http://bar/file.txt") "some rando")) + (check-equal? + (str->peer "http://bar/file.txt #") + (Peer #f (str->url "http://bar/file.txt") "")) + (check-equal? + (str->peer "http://bar/file.txt#") ; XXX URLs can have #s + (Peer #f (str->url "http://bar/file.txt#") #f)) + (check-equal? + (str->peer "http://bar/file.txt") + (Peer #f (str->url "http://bar/file.txt") #f)) + (check-equal? + (str->peer "foo http://bar/file.txt") + (Peer "foo" (str->url "http://bar/file.txt") #f)) + (check-equal? + (str->peer "foo bar # baz") + #f) + (check-equal? + (str->peer "foo bar://baz # quux") + (Peer "foo" (str->url "bar://baz") "quux")) + (check-equal? + (str->peer "foo bar//baz # quux") + #f)) (: filter-comments (-> (Listof String) (Listof String))) (define (filter-comments lines) @@ -321,8 +363,11 @@ (define (peers->file peers path) (display-lines-to-file (map (match-lambda - [(Peer n u) - (format "~a~a" (if n (format "~a " n) "") (url->string u))]) + [(Peer n u c) + (format "~a~a~a" + (if n (format "~a " n) "") + (url->string u) + (if c (format " # ~a" c) ""))]) peers) path #:exists 'replace)) @@ -382,8 +427,8 @@ [user (if (file-exists? user-peer-file) (match (first (file->peers user-peer-file)) - [(Peer #f u) (format "+~a" (url->string u) )] - [(Peer n u) (format "+~a; @~a" (url->string u) n)]) + [(Peer #f u _) (format "+~a" (url->string u) )] + [(Peer n u _) (format "+~a; @~a" (url->string u) n)]) (format "+~a" prog-uri))]) (format "~a/~a (~a)" prog-name prog-version user))) @@ -393,13 +438,59 @@ [(list val) val] [_ #f])) -(: uri-download (-> Positive-Float Url Void)) -(define (uri-download timeout u) +(: uri-download-from-port + (-> Url (Listof (U Bytes String)) Input-Port + (U 'skipped-cached 'downloaded-new))) ; TODO 'ok|'error ? +(define (uri-download-from-port u headers body-input) + (define u-str (url->string u)) + (log-debug "uri-download-from-port ~v into ~v" u-str cached-object-path) (define cached-object-path (url->cache-object-path u)) (define cached-etag-path (url->cache-etag-path u)) (define cached-lmod-path (url->cache-lmod-path u)) + (define etag (header-get headers #"ETag")) + (define lmod (header-get headers #"Last-Modified")) + (define lmod-curr (if lmod (rfc2822->epoch lmod) #f)) + (define lmod-prev (if (file-exists? cached-lmod-path) + (rfc2822->epoch (file->bytes cached-lmod-path)) + #f)) + (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) + (define cached? + (or (and etag + (file-exists? cached-etag-path) + (bytes=? etag (file->bytes cached-etag-path)) + (begin + (log-debug "ETags match, skipping the rest of ~v" u-str) + #t)) + (and lmod-curr + lmod-prev + (<= lmod-curr lmod-prev) + (begin + (log-debug "Last-Modified <= current skipping the rest of ~v" u-str) + #t)))) + (if (not cached?) + (begin + (log-debug + "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" + u-str etag lmod) + (make-parent-directory* cached-object-path) + (make-parent-directory* cached-etag-path) + (make-parent-directory* cached-lmod-path) + (call-with-output-file cached-object-path + (curry copy-port body-input) + #:exists 'replace) + (when etag + (display-to-file etag cached-etag-path #:exists 'replace)) + (when lmod + (display-to-file lmod cached-lmod-path #:exists 'replace)) + 'downloaded-new) + 'skipped-cached)) + +(: uri-download + (-> Positive-Float Url + (Result (U 'skipped-cached 'downloaded-new) + Any))) ; TODO Maybe more-precise error type? +(define (uri-download timeout u) (define u-str (url->string u)) - (log-debug "uri-download ~v into ~v" u-str cached-object-path) (define timeout-chan (make-channel)) (define result-chan (make-channel)) (define timeout-thread @@ -407,7 +498,7 @@ ; Doing this instead of sync/timeout to distinguish error values, ; rather than just have #f to work with. (sleep timeout) - (channel-put timeout-chan (cons 'error 'timeout))))) + (channel-put timeout-chan '(error . timeout))))) (define result-thread (thread (λ () ; XXX We timeout getting a response, but body download could @@ -425,12 +516,16 @@ ; and read-from disk as a whole. (define result (with-handlers - ([exn:fail? (λ (e) (cons 'error (cons 'net-error e)))]) + ; TODO Maybe name each known errno? (exn:fail:network:errno-errno e) + ([exn:fail:network? + (λ (e) `(error . (net-error . ,e)))] + [exn? + (λ (e) `(error . (other . ,e)))]) (define-values (status-line headers body-input) (http-sendrecv/url u #:headers (list (format "User-Agent: ~a" user-agent)))) - (cons 'ok (Resp status-line headers body-input)))) + `(ok . ,(Resp status-line headers body-input)))) (channel-put result-chan result)))) (define result (sync timeout-chan @@ -438,55 +533,23 @@ (kill-thread result-thread) (kill-thread timeout-thread) (match result - [(cons 'error 'timeout) - (log-error "Download failed: timeout. URL:~v" u-str)] - [(cons 'error (cons 'net-error e)) - (log-error "Download failed. Network error. URL:~v EXN:~v" u-str e)] + [(cons 'error _) + result] [(cons 'ok (Resp status-line headers body-input)) (log-debug "headers: ~v" headers) (log-debug "status-line: ~v" status-line) (define status (string->number (second (string-split (bytes->string/utf-8 status-line))))) (log-debug "status: ~v" status) - ; TODO Handle redirects - (match status - [200 - (let* ([etag (header-get headers #"ETag")] - [lmod (header-get headers #"Last-Modified")] - [lmod-curr (if lmod (rfc2822->epoch lmod) #f)] - [lmod-prev (if (file-exists? cached-lmod-path) - (rfc2822->epoch (file->bytes cached-lmod-path)) - #f)]) - (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) - (unless (or (and etag - (file-exists? cached-etag-path) - (bytes=? etag (file->bytes cached-etag-path)) - (begin - (log-info "ETags match, skipping the rest of ~v" u-str) - #t)) - (and lmod-curr - lmod-prev - (<= lmod-curr lmod-prev) - (begin - (log-info "Last-Modified <= current skipping the rest of ~v" u-str) - #t))) - (begin - (log-info - "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" - u-str etag lmod) - (make-parent-directory* cached-object-path) - (make-parent-directory* cached-etag-path) - (make-parent-directory* cached-lmod-path) - (call-with-output-file cached-object-path - (curry copy-port body-input) - #:exists 'replace) - (when etag - (display-to-file etag cached-etag-path #:exists 'replace)) - (when lmod - (display-to-file lmod cached-lmod-path #:exists 'replace)))))] - [_ - (log-error "HTTP error URL:~a status:~a" u-str status)]) - (close-input-port body-input)])) + ; TODO Handle redirects. Should be within same timeout as req and body. + (let ([result + (match status + [200 + `(ok . ,(uri-download-from-port u headers body-input))] + [_ + `(error . (http . ,status))])]) + (close-input-port body-input) + result)])) (: timeline-print (-> Out-Format (Listof Msg) Void)) (define (timeline-print out-format timeline) @@ -499,24 +562,51 @@ timeline))) (: peer->msgs (-> Peer (Listof Msg))) -(define (peer->msgs f) - (match-define (Peer nick uri) f) - (log-info "Reading peer nick:~v uri:~v" nick (url->string uri)) - (str->msgs nick uri (uri-read-cached uri))) - -(: peer-download (-> Positive-Float Peer Void)) +(define (peer->msgs peer) + (match-define (Peer nick uri _) peer) + (log-debug "Reading peer nick:~v uri:~v" nick (url->string uri)) + (define msgs-data (uri-read-cached uri)) + (if msgs-data + (str->msgs nick uri msgs-data) + '())) + +(: peer-download + (-> Positive-Float Peer + (Result (U 'skipped-cached 'downloaded-new) + Any))) (define (peer-download timeout peer) - (match-define (Peer nick uri) peer) + (match-define (Peer nick uri _) peer) (define u (url->string uri)) - (log-info "Download BEGIN uri:~a" u) - (define-values (_result _tm-cpu-ms tm-real-ms _tm-gc-ms) + (log-info "Download BEGIN URL:~a" u) + (define-values (results _tm-cpu-ms tm-real-ms _tm-gc-ms) (time-apply uri-download (list timeout uri))) - (log-info "Download END in ~a seconds, uri: ~a" (/ tm-real-ms 1000.0) u)) + (define result (car results)) + (log-info "Download END in ~a seconds, URL:~a, result:~s" + (/ tm-real-ms 1000.0) + u + result) + result) (: timeline-download (-> Integer Positive-Float (Listof Peer) Void)) (define (timeline-download num-workers timeout peers) - ; TODO No need for map - can just iter - (void (concurrent-filter-map num-workers (λ (p) (peer-download timeout p)) peers))) + (define results + (concurrent-filter-map num-workers + (λ (p) (cons p (peer-download timeout p))) + peers)) + (define peers-ok + (filter-map (match-lambda + [(cons p (cons 'ok _)) p] + [(cons _ (cons 'error e)) #f]) + results)) + (define peers-err + (filter-map (match-lambda + [(cons _ (cons 'ok _)) + #f] + [(cons p (cons 'error e)) + (struct-copy Peer p [comment (format "~s" e)])]) + results)) + (peers->file peers-ok (build-path tt-home-dir "peers-last-downloaded-ok")) + (peers->file peers-err (build-path tt-home-dir "peers-last-downloaded-err"))) (: uniq (∀ (α) (-> (Listof α) (Listof α)))) (define (uniq xs) @@ -556,7 +646,7 @@ (append* (map (λ (filename) (define path (build-path cache-object-dir filename)) (define size (/ (file-size path) 1000000.0)) - (log-info "BEGIN parsing ~a MB from file: ~v" + (log-debug "BEGIN parsing ~a MB from file: ~v" size (path->string path)) (define t0 (current-inexact-milliseconds)) @@ -566,12 +656,12 @@ (filter-comments (file->lines path)))) (define t1 (current-inexact-milliseconds)) - (log-info "END parsing ~a MB in ~a seconds from file: ~v." + (log-debug "END parsing ~a MB in ~a seconds from file: ~v." size (* 0.001 (- t1 t0)) (path->string path)) (when (empty? m) - (log-warning "No messages found in ~a" (path->string path))) + (log-debug "No messages found in ~a" (path->string path))) m) (directory-list cache-object-dir)))) (uniq (append* (map Msg-mentions msgs)))) @@ -615,7 +705,7 @@ "r, read : Read the timeline (offline operation)." "d, download : Download the timeline." ; TODO Add path dynamically - "u, upload : Upload your twtxt file (alias to execute ~/.tt/upload)." + "u, upload : Upload your twtxt file (alias to execute ~/.tt/hooks/upload)." "c, crawl : Discover new peers mentioned by known peers (offline operation)." "" #:args (command . args) @@ -649,7 +739,7 @@ #:program "tt upload" #:args () - (if (system (path->string (build-path tt-home-dir "upload"))) + (if (system (path->string (build-path tt-home-dir "hooks" "upload"))) (exit 0) (exit 1)))] [(or "r" "read") @@ -698,13 +788,15 @@ #:args () (let* ([peers-sort (λ (peers) (sort peers (match-lambda** - [((Peer n1 _) (Peer n2 _)) + [((Peer n1 _ _) (Peer n2 _ _)) (stringset peers-all-prev))] [peers-all - (peers-sort (set->list peers-all))]) + (peers-sort (set->list peers-all))] + [peers-parsed + (filter + (λ (p) (< 0 (length (peer->msgs p)))) + peers-all)]) (log-info "Known peers mentioned: ~a" (length peers-mentioned)) + (log-info "Known peers parsed ~a" (length peers-parsed)) (log-info "Known peers total: ~a" (length peers-all)) (log-info "Discovered ~a new peers:~n~a" (set-count peers-discovered) @@ -731,6 +828,8 @@ (set->list peers-discovered)))) (peers->file peers-mentioned peers-mentioned-file) + (peers->file peers-parsed + peers-parsed-file) (peers->file peers-all peers-all-file)))] [command