X-Git-Url: https://git.xandkar.net/?a=blobdiff_plain;ds=sidebyside;f=tt.rkt;h=bc8e8bc5c134b92f596b08b59bfc37ce3a8865a8;hb=refs%2Ftags%2F0.18.0;hp=d6218b8305d4c116014f189dc36e08f5a51ae822;hpb=d718efc4fa25667b6c42c97b8f0998ff3eb9e09c;p=tt.git diff --git a/tt.rkt b/tt.rkt index d6218b8..bc8e8bc 100644 --- a/tt.rkt +++ b/tt.rkt @@ -4,10 +4,8 @@ (require racket/date) (require net/head - net/http-client net/uri-codec - net/url-string - net/url-structs) + net/url) (require (prefix-in info: "info.rkt")) @@ -281,6 +279,10 @@ (log-warning "Cache file not found for URI: ~a" (url->string uri)) ""))) +(: uri? (-> String Boolean)) +(define (uri? str) + (regexp-match? #rx"^[a-z]+://.*" (string-downcase str))) + (: str->peer (String (Option Peer))) (define (str->peer str) (log-debug "Parsing peer string: ~v" str) @@ -290,8 +292,8 @@ (log-error "Invalid URI in string: ~v, exn: ~v" str e) #f)]) (match (string-split str) - [(list u) (Peer #f (string->url u))] - [(list n u) (Peer n (string->url u))] + [(list u) #:when (uri? u) (Peer #f (string->url u))] + [(list n u) #:when (uri? u) (Peer n (string->url u))] [_ (log-error "Invalid peer string: ~v" str) #f]))) @@ -305,14 +307,61 @@ (define (str->peers str) (filter-map str->peer (filter-comments (str->lines str)))) +(: peers->file (-> (Listof Peers) Path-String Void)) +(define (peers->file peers path) + (display-lines-to-file + (map (match-lambda + [(Peer n u) + (format "~a~a" (if n (format "~a " n) "") (url->string u))]) + peers) + path + #:exists 'replace)) + (: file->peers (-> Path-String (Listof Peer))) (define (file->peers file-path) (if (file-exists? file-path) (str->peers (file->string file-path)) (begin - (log-error "File does not exist: ~v" (path->string file-path)) + (log-warning "File does not exist: ~v" (path->string file-path)) '()))) +(define re-rfc2822 + #px"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0-9]{2}) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) ([0-2][0-9]):([0-6][0-9]):([0-6][0-9]) GMT") + +(: b->n (-> Bytes (Option Number))) +(define (b->n b) + (string->number (bytes->string/utf-8 b))) + +(: mon->num (-> Bytes Natural)) +(define/match (mon->num mon) + [(#"Jan") 1] + [(#"Feb") 2] + [(#"Mar") 3] + [(#"Apr") 4] + [(#"May") 5] + [(#"Jun") 6] + [(#"Jul") 7] + [(#"Aug") 8] + [(#"Sep") 9] + [(#"Oct") 10] + [(#"Nov") 11] + [(#"Dec") 12]) + +(: rfc2822->epoch (-> Bytes (Option Nonnegative-Integer))) +(define (rfc2822->epoch timestamp) + (match (regexp-match re-rfc2822 timestamp) + [(list _ _ dd mo yyyy HH MM SS) + #:when (and dd mo yyyy HH MM SS) + (find-seconds (b->n SS) + (b->n MM) + (b->n HH) + (b->n dd) + (mon->num mo) + (b->n yyyy) + #f)] + [_ + #f])) + (: user-agent String) (define user-agent (let* @@ -340,51 +389,53 @@ (define cached-etag-path (url->cache-etag-path u)) (define cached-lmod-path (url->cache-lmod-path u)) (log-debug "uri-download ~v into ~v" u cached-object-path) - (match* ((url-scheme u) (url-host u) (url-port u)) - [(s h p) - #:when (and s h) - (define ssl? (string=? s "https")) - (define-values (status-line headers body-input) - ; TODO Timeout. Currently hangs on slow connections. - (http-sendrecv - h - (url->string (struct-copy url u [scheme #f] [host #f])) - #:ssl? ssl? - #:port (cond [p p] [ssl? 443] [else 80]) - #:headers (list (format "User-Agent: ~a" user-agent)))) - (log-debug "headers: ~v" headers) - (log-debug "status-line: ~v" status-line) - (define status - (string->number (second (string-split (bytes->string/utf-8 status-line))))) - (log-debug "status: ~v" status) - ; TODO Handle redirects - (match status - [200 - (let ([etag (header-get headers #"ETag")] - [lmod (header-get headers #"Last-Modified")]) - (if (and etag - (file-exists? cached-etag-path) - (bytes=? etag (file->bytes cached-etag-path))) - (log-info "ETags match, skipping the rest of ~v" (url->string u)) - (begin - (log-info - "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" - (url->string u) etag lmod) - (make-parent-directory* cached-object-path) - (make-parent-directory* cached-etag-path) - (make-parent-directory* cached-lmod-path) - (call-with-output-file cached-object-path - (curry copy-port body-input) - #:exists 'replace) - (when etag - (display-to-file etag cached-etag-path #:exists 'replace)) - (when lmod - (display-to-file etag cached-lmod-path #:exists 'replace)))) - (close-input-port body-input))] - [_ - (raise status)])] - [(_ _ _) - (log-error "Invalid URI: ~v" u)])) + (define-values (status-line headers body-input) + ; TODO Timeout. Currently hangs on slow connections. + (http-sendrecv/url u #:headers (list (format "User-Agent: ~a" user-agent)))) + (log-debug "headers: ~v" headers) + (log-debug "status-line: ~v" status-line) + (define status + (string->number (second (string-split (bytes->string/utf-8 status-line))))) + (log-debug "status: ~v" status) + ; TODO Handle redirects + (match status + [200 + (let* ([etag (header-get headers #"ETag")] + [lmod (header-get headers #"Last-Modified")] + [lmod-curr (if lmod (rfc2822->epoch lmod) #f)] + [lmod-prev (if (file-exists? cached-lmod-path) + (rfc2822->epoch (file->bytes cached-lmod-path)) + #f)]) + (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) + (unless (or (and etag + (file-exists? cached-etag-path) + (bytes=? etag (file->bytes cached-etag-path)) + (begin + (log-info "ETags match, skipping the rest of ~v" (url->string u)) + #t)) + (and lmod-curr + lmod-prev + (<= lmod-curr lmod-prev) + (begin + (log-info "Last-Modified <= current skipping the rest of ~v" (url->string u)) + #t))) + (begin + (log-info + "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" + (url->string u) etag lmod) + (make-parent-directory* cached-object-path) + (make-parent-directory* cached-etag-path) + (make-parent-directory* cached-lmod-path) + (call-with-output-file cached-object-path + (curry copy-port body-input) + #:exists 'replace) + (when etag + (display-to-file etag cached-etag-path #:exists 'replace)) + (when lmod + (display-to-file lmod cached-lmod-path #:exists 'replace)))) + (close-input-port body-input))] + [_ + (raise status)])) (: timeline-print (-> Out-Format (Listof Msg) Void)) (define (timeline-print out-format timeline) @@ -425,14 +476,20 @@ ; TODO No need for map - can just iter (void (concurrent-filter-map num-workers peer-download peers))) -; TODO timeline contract : time-sorted list of messages -(: timeline-read (-> Timeline-Order (Listof Peer) (Listof Msg))) -(define (timeline-read order peers) +(define (uniq xs) + (set->list (list->set xs))) + +(: peers->timeline (-> (listof Peer) (listof Msg))) +(define (peers->timeline peers) + (append* (filter-map peer->msgs peers))) + +(: timeline-sort (-> (listof Msg) timeline-order (Listof Msgs))) +(define (timeline-sort msgs order) (define cmp (match order ['old->new <] ['new->old >])) - (sort (append* (filter-map peer->msgs peers)) - (λ (a b) (cmp (Msg-ts-epoch a) (Msg-ts-epoch b))))) + (sort msgs (λ (a b) (cmp (Msg-ts-epoch a) + (Msg-ts-epoch b))))) (: paths->peers (-> (Listof String) (Listof Peer))) (define (paths->peers paths) @@ -486,10 +543,11 @@ #:help-labels "" "and is one of" - "r, read i : Read the timeline." + "r, read : Read the timeline (offline operation)." "d, download : Download the timeline." ; TODO Add path dynamically "u, upload : Upload your twtxt file (alias to execute ~/.tt/upload)." + "c, crawl : Discover new peers mentioned by known peers (offline operation)." "" #:args (command . args) (define log-writer (log-writer-start log-level)) @@ -507,9 +565,12 @@ njobs "Number of concurrent jobs." (set! num-workers (string->number njobs))] #:args file-paths - (define-values (_res _cpu real-ms _gc) - (time-apply timeline-download (list num-workers (paths->peers file-paths)))) - (log-info "Timeline downloaded in ~a seconds." (/ real-ms 1000.0))))] + (let ([peers (paths->peers file-paths)]) + (define-values (_res _cpu real-ms _gc) + (time-apply timeline-download (list num-workers peers))) + (log-info "Downloaded timelines from ~a peers in ~a seconds." + (length peers) + (/ real-ms 1000.0)))))] [(or "u" "upload") (command-line #:program @@ -536,5 +597,47 @@ "Long output format" (set! out-format 'multi-line)] #:args file-paths - (timeline-print out-format (timeline-read order (paths->peers file-paths)))))]) + (let* ([peers + (paths->peers file-paths)] + [timeline + (timeline-sort (peers->timeline peers) order)]) + (timeline-print out-format timeline))))] + [(or "c" "crawl") + (command-line + #:program + "tt crawl" + #:args file-paths + (let* ([peers-all-file + (build-path tt-home-dir "peers-all")] + [peers-mentioned-file + (build-path tt-home-dir "peers-mentioned")] + [peers + (paths->peers + (match file-paths + ; TODO Refactor such that path->string not needed + ['() (list (path->string peers-all-file))] + [_ file-paths]))] + [timeline + (peers->timeline peers)] + [peers-mentioned-curr + (uniq (append* (map Msg-mentions timeline)))] + [peers-mentioned-prev + (file->peers peers-mentioned-file)] + [peers-mentioned + (uniq (append peers-mentioned-prev + peers-mentioned-curr))] + [peers-all-prev + (file->peers peers-all-file)] + [peers-all + (uniq (append peers + peers-mentioned + peers-all-prev))]) + (peers->file peers-mentioned + peers-mentioned-file) + (peers->file peers-all + peers-all-file)))] + [command + (eprintf "Error: invalid command: ~v\n" command) + (eprintf "Please use the \"--help\" option to see a list of available commands.\n") + (exit 1)]) (log-writer-stop log-writer))))