Commit | Line | Data |
---|---|---|
cd541405 | 1 | #lang typed/racket/no-check |
4764ff89 | 2 | |
1d753430 | 3 | (require openssl/sha1) |
4764ff89 | 4 | (require racket/date) |
9c464d95 | 5 | (require |
d718efc4 | 6 | net/head |
edadb804 | 7 | net/uri-codec |
8da029e4 | 8 | net/url) |
4764ff89 | 9 | |
3c9c8266 SK |
10 | (require (prefix-in info: "info.rkt")) |
11 | ||
78398948 | 12 | (module+ test |
de3ff448 | 13 | (require rackunit)) |
78398948 | 14 | |
98529d3d SK |
15 | (define-type Url |
16 | net/url-structs:url) | |
17 | ||
18 | (define-type Out-Format | |
19 | (U 'single-line | |
20 | 'multi-line)) | |
21 | ||
22 | (define-type Timeline-Order | |
23 | (U 'old->new | |
24 | 'new->old)) | |
25 | ||
7fd20778 SK |
26 | (define-type Result |
27 | (∀ (α β) (U (cons 'ok α) | |
28 | (cons 'error β)))) | |
29 | ||
b0ff061a | 30 | (struct Msg |
78142acb | 31 | ([ts-epoch : Integer] |
3877a0c4 | 32 | [ts-orig : String] |
13c11724 | 33 | [nick : (Option String)] |
98529d3d | 34 | [uri : Url] |
13c11724 | 35 | [text : String] |
b0ff061a | 36 | [mentions : (Listof Peer)])) |
9c464d95 | 37 | |
dbc26280 | 38 | (struct Peer |
b056019b SK |
39 | ([nick : (Option String)] |
40 | [uri : Url] | |
e2840743 | 41 | [uri-str : String] |
b056019b | 42 | [comment : (Option String)]) |
d0a0e073 | 43 | #:transparent) |
4764ff89 | 44 | |
f65d6338 SK |
45 | (struct Resp |
46 | ([status-line : String] | |
47 | [headers : (Listof Bytes)] | |
7fd20778 SK |
48 | [body-input : Input-Port]) |
49 | #:transparent) | |
f65d6338 | 50 | |
e2840743 SK |
51 | (define-custom-set-types peers |
52 | #:elem? Peer? | |
53 | (λ (p1 p2) | |
54 | (equal? (Peer-uri-str p1) | |
55 | (Peer-uri-str p2))) | |
56 | (λ (p) | |
57 | (equal-hash-code (Peer-uri-str p)))) | |
58 | ; XXX Without supplying above explicit hash procedure, we INTERMITTENTLY get | |
59 | ; the following contract violations: | |
60 | ; | |
61 | ; custom-elem-contents: contract violation | |
62 | ; expected: custom-elem? | |
63 | ; given: #f | |
64 | ; context...: | |
65 | ; /usr/share/racket/collects/racket/private/set-types.rkt:104:0: custom-set->list | |
66 | ; /home/siraaj/proj/pub/tt/tt.rkt:716:0: crawl | |
67 | ; /usr/share/racket/collects/racket/cmdline.rkt:191:51 | |
68 | ; body of (submod "/home/siraaj/proj/pub/tt/tt.rkt" main) | |
69 | ; | |
70 | ; TODO Investigate why and make a minimal reproducible test case. | |
71 | ||
edadb804 SK |
72 | (: tt-home-dir Path-String) |
73 | (define tt-home-dir (build-path (expand-user-path "~") ".tt")) | |
74 | ||
9a346534 | 75 | (: concurrent-filter-map (∀ (α β) (-> Natural (-> α β) (Listof α) (Listof β)))) |
78142acb | 76 | (define (concurrent-filter-map num-workers f xs) |
dad4504d | 77 | ; TODO preserve order of elements OR communicate that reorder is expected |
a239a233 | 78 | ; TODO switch from mailboxes to channels |
895a32cf SK |
79 | (define (make-worker id f) |
80 | (define parent (current-thread)) | |
81 | (λ () | |
a9511f7c SK |
82 | (define self : Thread (current-thread)) |
83 | (: work (∀ (α) (-> α))) | |
895a32cf SK |
84 | (define (work) |
85 | (thread-send parent (cons 'next self)) | |
86 | (match (thread-receive) | |
c562bea3 SK |
87 | ['done (thread-send parent (cons 'exit id))] |
88 | [(cons 'unit x) (begin | |
89 | (define y (f x)) | |
90 | (when y (thread-send parent (cons 'result y))) | |
91 | (work))])) | |
895a32cf | 92 | (work))) |
a9511f7c | 93 | (: dispatch (∀ (α β) (-> (Listof Nonnegative-Integer) (Listof α) (Listof β)))) |
895a32cf SK |
94 | (define (dispatch ws xs ys) |
95 | (if (empty? ws) | |
f1493e49 SK |
96 | ys |
97 | (match (thread-receive) | |
c562bea3 SK |
98 | [(cons 'exit w) (dispatch (remove w ws =) xs ys)] |
99 | [(cons 'result y) (dispatch ws xs (cons y ys))] | |
100 | [(cons 'next thd) (match xs | |
101 | ['() (begin | |
102 | (thread-send thd 'done) | |
103 | (dispatch ws xs ys))] | |
104 | [(cons x xs) (begin | |
105 | (thread-send thd (cons 'unit x)) | |
106 | (dispatch ws xs ys))])]))) | |
78142acb | 107 | (define workers (range num-workers)) |
9926c9a9 SK |
108 | (define threads (map (λ (id) (thread (make-worker id f))) workers)) |
109 | (define results (dispatch workers xs '())) | |
895a32cf SK |
110 | (for-each thread-wait threads) |
111 | results) | |
112 | ||
dad4504d | 113 | (module+ test |
de3ff448 SK |
114 | (let* ([f (λ (x) (if (even? x) x #f))] |
115 | [xs (range 11)] | |
116 | [actual (sort (concurrent-filter-map 10 f xs) <)] | |
117 | [expected (sort ( filter-map f xs) <)]) | |
c562bea3 | 118 | (check-equal? actual expected "concurrent-filter-map"))) |
dad4504d | 119 | |
98529d3d | 120 | (: msg-print (-> Out-Format Integer Msg Void)) |
3d042e75 SK |
121 | (define msg-print |
122 | (let* ([colors (vector 36 33)] | |
123 | [n (vector-length colors)]) | |
124 | (λ (out-format color-i msg) | |
01e4c499 | 125 | (let ([color (vector-ref colors (modulo color-i n))] |
b0ff061a SK |
126 | [nick (Msg-nick msg)] |
127 | [uri (url->string (Msg-uri msg))] | |
128 | [text (Msg-text msg)] | |
129 | [mentions (Msg-mentions msg)]) | |
3d042e75 | 130 | (match out-format |
01e4c499 | 131 | ['single-line |
13c11724 SK |
132 | (let ([nick (if nick nick uri)]) |
133 | (printf "~a \033[1;37m<~a>\033[0m \033[0;~am~a\033[0m~n" | |
134 | (parameterize | |
135 | ([date-display-format 'iso-8601]) | |
b0ff061a | 136 | (date->string (seconds->date (Msg-ts-epoch msg)) #t)) |
13c11724 | 137 | nick color text))] |
01e4c499 | 138 | ['multi-line |
13c11724 SK |
139 | (let ([nick (if nick (string-append nick " ") "")]) |
140 | (printf "~a (~a)~n\033[1;37m<~a~a>\033[0m~n\033[0;~am~a\033[0m~n~n" | |
141 | (parameterize | |
142 | ([date-display-format 'rfc2822]) | |
b0ff061a SK |
143 | (date->string (seconds->date (Msg-ts-epoch msg)) #t)) |
144 | (Msg-ts-orig msg) | |
13c11724 | 145 | nick uri color text))]))))) |
e96264cc | 146 | |
3877a0c4 SK |
147 | (: rfc3339->epoch (-> String (Option Nonnegative-Integer))) |
148 | (define rfc3339->epoch | |
149 | (let ([re (pregexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2})(:([0-9]{2}))?(\\.[0-9]+)?(Z|([+-])([0-9]{1,2}):?([0-9]{2}))?$")]) | |
150 | (λ (ts) | |
151 | (match (regexp-match re ts) | |
152 | [(list _wholething yyyy mm dd HH MM _:SS SS _fractional tz-whole tz-sign tz-HH tz-MM) | |
153 | (let* | |
154 | ([tz-offset | |
155 | (match* (tz-whole tz-sign tz-HH tz-MM) | |
156 | [("Z" #f #f #f) | |
157 | 0] | |
158 | [(_ (or "-" "+") (? identity h) (? identity m)) | |
159 | (let ([h (string->number h)] | |
160 | [m (string->number m)] | |
161 | ; Reverse to get back to UTC: | |
162 | [op (match tz-sign ["+" -] ["-" +])]) | |
163 | (op 0 (+ (* 60 m) (* 60 (* 60 h)))))] | |
164 | [(a b c d) | |
165 | (log-warning "Impossible TZ string: ~v, components: ~v ~v ~v ~v" tz-whole a b c d) | |
166 | 0])] | |
167 | [ts-orig ts] | |
168 | [local-time? #f] | |
169 | [ts-epoch (find-seconds (if SS (string->number SS) 0) | |
170 | (string->number MM) | |
171 | (string->number HH) | |
172 | (string->number dd) | |
173 | (string->number mm) | |
174 | (string->number yyyy) | |
175 | local-time?)]) | |
176 | (+ ts-epoch tz-offset))] | |
177 | [_ | |
b8b29fbb | 178 | (log-debug "Invalid timestamp: ~v" ts) |
3877a0c4 SK |
179 | #f])))) |
180 | ||
13c11724 | 181 | (: str->msg (-> (Option String) Url String (Option Msg))) |
b4689464 | 182 | (define str->msg |
3877a0c4 | 183 | (let ([re (pregexp "^([^\\s\t]+)[\\s\t]+(.*)$")]) |
b4689464 | 184 | (λ (nick uri str) |
d3ac9e11 | 185 | (define str-head (substring str 0 (min 100 (string-length str)))) |
b4689464 SK |
186 | (with-handlers* |
187 | ([exn:fail? | |
188 | (λ (e) | |
b8b29fbb | 189 | (log-debug |
9c464d95 | 190 | "Failed to parse msg: ~v, from: ~v, at: ~v, because: ~v" |
d3ac9e11 | 191 | str-head nick (url->string uri) e) |
b4689464 SK |
192 | #f)]) |
193 | (match (regexp-match re str) | |
3877a0c4 SK |
194 | [(list _wholething ts-orig text) |
195 | (let ([ts-epoch (rfc3339->epoch ts-orig)]) | |
196 | (if ts-epoch | |
13c11724 SK |
197 | (let ([mentions |
198 | (filter-map | |
199 | (λ (m) (match (regexp-match #px"@<([^>]+)>" m) | |
200 | [(list _wholething nick-uri) | |
dbc26280 | 201 | (str->peer nick-uri)])) |
13c11724 | 202 | (regexp-match* #px"@<[^\\s]+([\\s]+)?[^>]+>" text))]) |
b0ff061a | 203 | (Msg ts-epoch ts-orig nick uri text mentions)) |
3877a0c4 | 204 | (begin |
b8b29fbb | 205 | (log-debug |
3877a0c4 | 206 | "Msg rejected due to invalid timestamp: ~v, nick:~v, uri:~v" |
d3ac9e11 | 207 | str-head nick (url->string uri)) |
3877a0c4 | 208 | #f)))] |
b4689464 | 209 | [_ |
d3ac9e11 | 210 | (log-debug "Non-msg line from nick:~v, line:~a" nick str-head) |
b4689464 | 211 | #f]))))) |
88d50b3e | 212 | |
63afa259 | 213 | (module+ test |
13c11724 | 214 | ; TODO Test for when missing-nick case |
b4689464 SK |
215 | (let* ([tzs (for*/list ([d '("-" "+")] |
216 | [h '("5" "05")] | |
217 | [m '("00" ":00" "57" ":57")]) | |
218 | (string-append d h m))] | |
219 | [tzs (list* "" "Z" tzs)]) | |
220 | (for* ([n '("fake-nick")] | |
221 | [u '("fake-uri")] | |
222 | [s '("" ":10")] | |
223 | [f '("" ".1337")] | |
224 | [z tzs] | |
225 | [sep (list "\t" " ")] | |
226 | [txt '("foo bar baz" "'jaz poop bear giraffe / tea" "@*\"``")]) | |
227 | (let* ([ts (string-append "2020-11-18T22:22" | |
228 | (if (non-empty-string? s) s ":00") | |
229 | z)] | |
230 | [m (str->msg n u (string-append ts sep txt))]) | |
231 | (check-not-false m) | |
b0ff061a SK |
232 | (check-equal? (Msg-nick m) n) |
233 | (check-equal? (Msg-uri m) u) | |
234 | (check-equal? (Msg-text m) txt) | |
235 | (check-equal? (Msg-ts-orig m) ts (format "Given: ~v" ts)) | |
b4689464 SK |
236 | ))) |
237 | ||
de3ff448 SK |
238 | (let* ([ts "2020-11-18T22:22:09-0500"] |
239 | [tab " "] | |
240 | [text "Lorem ipsum"] | |
241 | [nick "foo"] | |
242 | [uri "bar"] | |
243 | [actual (str->msg nick uri (string-append ts tab text))] | |
a139076c | 244 | [expected (Msg 1605756129 ts nick uri text '())]) |
c562bea3 | 245 | (check-equal? |
b0ff061a SK |
246 | (Msg-ts-epoch actual) |
247 | (Msg-ts-epoch expected) | |
78142acb | 248 | "str->msg ts-epoch") |
3877a0c4 | 249 | (check-equal? |
b0ff061a SK |
250 | (Msg-ts-orig actual) |
251 | (Msg-ts-orig expected) | |
3877a0c4 | 252 | "str->msg ts-orig") |
c562bea3 | 253 | (check-equal? |
b0ff061a SK |
254 | (Msg-nick actual) |
255 | (Msg-nick expected) | |
c562bea3 SK |
256 | "str->msg nick") |
257 | (check-equal? | |
b0ff061a SK |
258 | (Msg-uri actual) |
259 | (Msg-uri expected) | |
c562bea3 SK |
260 | "str->msg uri") |
261 | (check-equal? | |
b0ff061a SK |
262 | (Msg-text actual) |
263 | (Msg-text expected) | |
c562bea3 | 264 | "str->msg text"))) |
63afa259 | 265 | |
98529d3d | 266 | (: str->lines (-> String (Listof String))) |
e96264cc SK |
267 | (define (str->lines str) |
268 | (string-split str (regexp "[\r\n]+"))) | |
269 | ||
63afa259 | 270 | (module+ test |
de3ff448 | 271 | (check-equal? (str->lines "abc\ndef\n\nghi") '("abc" "def" "ghi"))) |
63afa259 | 272 | |
13c11724 | 273 | (: str->msgs (-> (Option String) Url String (Listof Msg))) |
b201e854 | 274 | (define (str->msgs nick uri str) |
3877a0c4 | 275 | (filter-map (λ (line) (str->msg nick uri line)) (filter-comments (str->lines str)))) |
4764ff89 | 276 | |
edadb804 SK |
277 | (: cache-dir Path-String) |
278 | (define cache-dir (build-path tt-home-dir "cache")) | |
279 | ||
d718efc4 SK |
280 | (define cache-object-dir (build-path cache-dir "objects")) |
281 | ||
edadb804 SK |
282 | (: url->cache-file-path-v1 (-> Url Path-String)) |
283 | (define (url->cache-file-path-v1 uri) | |
284 | (define (hash-sha1 str) : (-> String String) | |
285 | (define in (open-input-string str)) | |
286 | (define digest (sha1 in)) | |
287 | (close-input-port in) | |
288 | digest) | |
d718efc4 | 289 | (build-path cache-object-dir (hash-sha1 (url->string uri)))) |
edadb804 SK |
290 | |
291 | (: url->cache-file-path-v2 (-> Url Path-String)) | |
292 | (define (url->cache-file-path-v2 uri) | |
d718efc4 SK |
293 | (build-path cache-object-dir (uri-encode (url->string uri)))) |
294 | ||
295 | (define url->cache-object-path url->cache-file-path-v2) | |
1d753430 | 296 | |
d3ac9e11 SK |
297 | (: cache-object-filename->url (-> Path-String Url)) |
298 | (define (cache-object-filename->url name) | |
299 | (string->url (uri-decode (path->string name)))) | |
300 | ||
d718efc4 SK |
301 | (define (url->cache-etag-path uri) |
302 | (build-path cache-dir "etags" (uri-encode (url->string uri)))) | |
303 | ||
304 | (define (url->cache-lmod-path uri) | |
305 | (build-path cache-dir "lmods" (uri-encode (url->string uri)))) | |
9c464d95 | 306 | |
fee11be9 | 307 | (: uri-read-cached (-> Url (Option String))) |
4214c0f3 | 308 | (define (uri-read-cached uri) |
edadb804 SK |
309 | (define path-v1 (url->cache-file-path-v1 uri)) |
310 | (define path-v2 (url->cache-file-path-v2 uri)) | |
311 | (when (file-exists? path-v1) | |
312 | (rename-file-or-directory path-v1 path-v2 #t)) | |
313 | (if (file-exists? path-v2) | |
314 | (file->string path-v2) | |
0e16a46c | 315 | (begin |
b8b29fbb | 316 | (log-debug "Cache file not found for URI: ~a" (url->string uri)) |
fee11be9 | 317 | #f))) |
4214c0f3 | 318 | |
b056019b SK |
319 | (: str->url (-> String (Option String))) |
320 | (define (str->url s) | |
321 | (with-handlers* | |
322 | ([exn:fail? (λ (e) #f)]) | |
323 | (string->url s))) | |
a60c484e | 324 | |
dbc26280 SK |
325 | (: str->peer (String (Option Peer))) |
326 | (define (str->peer str) | |
327 | (log-debug "Parsing peer string: ~v" str) | |
b056019b SK |
328 | (match |
329 | (regexp-match | |
330 | #px"(([^\\s\t]+)[\\s\t]+)?([a-zA-Z]+://[^\\s\t]*)[\\s\t]*(#\\s*(.*))?" | |
331 | str) | |
332 | [(list _wholething | |
333 | _nick-with-space | |
334 | nick | |
335 | url | |
336 | _comment-with-hash | |
337 | comment) | |
338 | (match (str->url url) | |
339 | [#f | |
340 | (log-error "Invalid URI in peer string: ~v" str) | |
341 | #f] | |
e2840743 SK |
342 | [url |
343 | (Peer nick url (url->string url) comment)])] | |
b056019b | 344 | [_ |
b8b29fbb | 345 | (log-debug "Invalid peer string: ~v" str) |
b056019b | 346 | #f])) |
13c11724 | 347 | |
b056019b SK |
348 | (module+ test |
349 | (check-equal? | |
350 | (str->peer "foo http://bar/file.txt # some rando") | |
e2840743 | 351 | (Peer "foo" (str->url "http://bar/file.txt") "http://bar/file.txt" "some rando")) |
b056019b SK |
352 | (check-equal? |
353 | (str->peer "http://bar/file.txt # some rando") | |
e2840743 | 354 | (Peer #f (str->url "http://bar/file.txt") "http://bar/file.txt" "some rando")) |
b056019b SK |
355 | (check-equal? |
356 | (str->peer "http://bar/file.txt #") | |
e2840743 | 357 | (Peer #f (str->url "http://bar/file.txt") "http://bar/file.txt" "")) |
b056019b SK |
358 | (check-equal? |
359 | (str->peer "http://bar/file.txt#") ; XXX URLs can have #s | |
e2840743 | 360 | (Peer #f (str->url "http://bar/file.txt#") "http://bar/file.txt#" #f)) |
b056019b SK |
361 | (check-equal? |
362 | (str->peer "http://bar/file.txt") | |
e2840743 | 363 | (Peer #f (str->url "http://bar/file.txt") "http://bar/file.txt" #f)) |
b056019b SK |
364 | (check-equal? |
365 | (str->peer "foo http://bar/file.txt") | |
e2840743 | 366 | (Peer "foo" (str->url "http://bar/file.txt") "http://bar/file.txt" #f)) |
b056019b SK |
367 | (check-equal? |
368 | (str->peer "foo bar # baz") | |
369 | #f) | |
370 | (check-equal? | |
371 | (str->peer "foo bar://baz # quux") | |
e2840743 | 372 | (Peer "foo" (str->url "bar://baz") "bar://baz" "quux")) |
b056019b SK |
373 | (check-equal? |
374 | (str->peer "foo bar//baz # quux") | |
375 | #f)) | |
9c464d95 | 376 | |
98529d3d | 377 | (: filter-comments (-> (Listof String) (Listof String))) |
9c464d95 SK |
378 | (define (filter-comments lines) |
379 | (filter-not (λ (line) (string-prefix? line "#")) lines)) | |
380 | ||
e2840743 | 381 | (: str->peers (-> String (Setof Peer))) |
dbc26280 | 382 | (define (str->peers str) |
e2840743 | 383 | (make-immutable-peers (filter-map str->peer (filter-comments (str->lines str))))) |
9c464d95 | 384 | |
e2840743 | 385 | (: peers->file (-> (Setof Peers) Path-String Void)) |
a60c484e SK |
386 | (define (peers->file peers path) |
387 | (display-lines-to-file | |
388 | (map (match-lambda | |
e2840743 | 389 | [(Peer n _ u c) |
b056019b SK |
390 | (format "~a~a~a" |
391 | (if n (format "~a " n) "") | |
e2840743 | 392 | u |
b056019b | 393 | (if c (format " # ~a" c) ""))]) |
e2840743 SK |
394 | (sort (set->list peers) |
395 | (match-lambda** | |
396 | [((Peer n1 _ _ _) (Peer n2 _ _ _)) | |
397 | (string<? (if n1 n1 "") | |
398 | (if n2 n2 ""))]))) | |
a60c484e SK |
399 | path |
400 | #:exists 'replace)) | |
401 | ||
e2840743 | 402 | (: file->peers (-> Path-String (Setof Peer))) |
d0a0e073 SK |
403 | (define (file->peers file-path) |
404 | (if (file-exists? file-path) | |
405 | (str->peers (file->string file-path)) | |
406 | (begin | |
a60c484e | 407 | (log-warning "File does not exist: ~v" (path->string file-path)) |
e2840743 | 408 | (make-immutable-peers)))) |
9c464d95 | 409 | |
9c5e4499 SK |
410 | (define re-rfc2822 |
411 | #px"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0-9]{2}) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) ([0-2][0-9]):([0-6][0-9]):([0-6][0-9]) GMT") | |
412 | ||
413 | (: b->n (-> Bytes (Option Number))) | |
414 | (define (b->n b) | |
415 | (string->number (bytes->string/utf-8 b))) | |
416 | ||
417 | (: mon->num (-> Bytes Natural)) | |
418 | (define/match (mon->num mon) | |
419 | [(#"Jan") 1] | |
420 | [(#"Feb") 2] | |
421 | [(#"Mar") 3] | |
422 | [(#"Apr") 4] | |
423 | [(#"May") 5] | |
424 | [(#"Jun") 6] | |
425 | [(#"Jul") 7] | |
426 | [(#"Aug") 8] | |
427 | [(#"Sep") 9] | |
428 | [(#"Oct") 10] | |
429 | [(#"Nov") 11] | |
430 | [(#"Dec") 12]) | |
431 | ||
432 | (: rfc2822->epoch (-> Bytes (Option Nonnegative-Integer))) | |
433 | (define (rfc2822->epoch timestamp) | |
434 | (match (regexp-match re-rfc2822 timestamp) | |
435 | [(list _ _ dd mo yyyy HH MM SS) | |
436 | #:when (and dd mo yyyy HH MM SS) | |
437 | (find-seconds (b->n SS) | |
438 | (b->n MM) | |
439 | (b->n HH) | |
440 | (b->n dd) | |
441 | (mon->num mo) | |
442 | (b->n yyyy) | |
443 | #f)] | |
444 | [_ | |
445 | #f])) | |
446 | ||
98529d3d | 447 | (: user-agent String) |
9c464d95 SK |
448 | (define user-agent |
449 | (let* | |
450 | ([prog-name "tt"] | |
3c9c8266 | 451 | [prog-version (info:#%info-lookup 'version)] |
9c464d95 | 452 | [prog-uri "https://github.com/xandkar/tt"] |
dbc26280 | 453 | [user-peer-file (build-path tt-home-dir "me")] |
9c464d95 | 454 | [user |
dbc26280 | 455 | (if (file-exists? user-peer-file) |
e2840743 SK |
456 | (match (set-first (file->peers user-peer-file)) |
457 | [(Peer #f _ u _) (format "+~a" u )] | |
458 | [(Peer n _ u _) (format "+~a; @~a" u n)]) | |
9c464d95 SK |
459 | (format "+~a" prog-uri))]) |
460 | (format "~a/~a (~a)" prog-name prog-version user))) | |
461 | ||
d718efc4 SK |
462 | (: header-get (-> (Listof Bytes) Bytes (Option Bytes))) |
463 | (define (header-get headers name) | |
464 | (match (filter-map (curry extract-field name) headers) | |
465 | [(list val) val] | |
466 | [_ #f])) | |
467 | ||
7fd20778 SK |
468 | (: uri-download-from-port |
469 | (-> Url (Listof (U Bytes String)) Input-Port | |
470 | (U 'skipped-cached 'downloaded-new))) ; TODO 'ok|'error ? | |
471 | (define (uri-download-from-port u headers body-input) | |
472 | (define u-str (url->string u)) | |
473 | (log-debug "uri-download-from-port ~v into ~v" u-str cached-object-path) | |
d718efc4 SK |
474 | (define cached-object-path (url->cache-object-path u)) |
475 | (define cached-etag-path (url->cache-etag-path u)) | |
476 | (define cached-lmod-path (url->cache-lmod-path u)) | |
7fd20778 SK |
477 | (define etag (header-get headers #"ETag")) |
478 | (define lmod (header-get headers #"Last-Modified")) | |
479 | (define lmod-curr (if lmod (rfc2822->epoch lmod) #f)) | |
480 | (define lmod-prev (if (file-exists? cached-lmod-path) | |
481 | (rfc2822->epoch (file->bytes cached-lmod-path)) | |
482 | #f)) | |
483 | (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) | |
484 | (define cached? | |
485 | (or (and etag | |
486 | (file-exists? cached-etag-path) | |
487 | (bytes=? etag (file->bytes cached-etag-path)) | |
488 | (begin | |
489 | (log-debug "ETags match, skipping the rest of ~v" u-str) | |
490 | #t)) | |
491 | (and lmod-curr | |
492 | lmod-prev | |
493 | (<= lmod-curr lmod-prev) | |
494 | (begin | |
495 | (log-debug "Last-Modified <= current skipping the rest of ~v" u-str) | |
496 | #t)))) | |
497 | (if (not cached?) | |
68bbd2e9 SK |
498 | (begin |
499 | (log-debug | |
500 | "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" | |
501 | u-str etag lmod) | |
502 | (make-parent-directory* cached-object-path) | |
503 | (make-parent-directory* cached-etag-path) | |
504 | (make-parent-directory* cached-lmod-path) | |
505 | (call-with-output-file cached-object-path | |
506 | (curry copy-port body-input) | |
507 | #:exists 'replace) | |
508 | (when etag | |
509 | (display-to-file etag cached-etag-path #:exists 'replace)) | |
510 | (when lmod | |
511 | (display-to-file lmod cached-lmod-path #:exists 'replace)) | |
512 | 'downloaded-new) | |
513 | 'skipped-cached)) | |
7fd20778 SK |
514 | |
515 | (: uri-download | |
516 | (-> Positive-Float Url | |
517 | (Result (U 'skipped-cached 'downloaded-new) | |
518 | Any))) ; TODO Maybe more-precise error type? | |
519 | (define (uri-download timeout u) | |
f65d6338 | 520 | (define u-str (url->string u)) |
f65d6338 SK |
521 | (define timeout-chan (make-channel)) |
522 | (define result-chan (make-channel)) | |
523 | (define timeout-thread | |
524 | (thread (λ () | |
525 | ; Doing this instead of sync/timeout to distinguish error values, | |
526 | ; rather than just have #f to work with. | |
527 | (sleep timeout) | |
7fd20778 | 528 | (channel-put timeout-chan '(error . timeout))))) |
f65d6338 SK |
529 | (define result-thread |
530 | (thread (λ () | |
531 | ; XXX We timeout getting a response, but body download could | |
532 | ; also take a long time and we might want to time that out as | |
533 | ; well, but then we may end-up with partially downloaded | |
534 | ; objects. But that could happen anyway if the server drops the | |
535 | ; connection for whatever reason. | |
536 | ; | |
537 | ; Maybe that is OK once we start treating the | |
538 | ; downloaded object as an addition to the stored set of | |
539 | ; messages, rather than the final set of messages. | |
540 | ||
541 | ; TODO message db | |
542 | ; - 1st try can just be an in-memory set that gets written-to | |
543 | ; and read-from disk as a whole. | |
544 | (define result | |
545 | (with-handlers | |
7fd20778 SK |
546 | ; TODO Maybe name each known errno? (exn:fail:network:errno-errno e) |
547 | ([exn:fail:network? | |
548 | (λ (e) `(error . (net-error . ,e)))] | |
549 | [exn? | |
550 | (λ (e) `(error . (other . ,e)))]) | |
f65d6338 SK |
551 | (define-values (status-line headers body-input) |
552 | (http-sendrecv/url | |
553 | u | |
554 | #:headers (list (format "User-Agent: ~a" user-agent)))) | |
7fd20778 | 555 | `(ok . ,(Resp status-line headers body-input)))) |
f65d6338 SK |
556 | (channel-put result-chan result)))) |
557 | (define result | |
558 | (sync timeout-chan | |
559 | result-chan)) | |
560 | (kill-thread result-thread) | |
561 | (kill-thread timeout-thread) | |
562 | (match result | |
7fd20778 SK |
563 | [(cons 'error _) |
564 | result] | |
f65d6338 SK |
565 | [(cons 'ok (Resp status-line headers body-input)) |
566 | (log-debug "headers: ~v" headers) | |
567 | (log-debug "status-line: ~v" status-line) | |
568 | (define status | |
569 | (string->number (second (string-split (bytes->string/utf-8 status-line))))) | |
570 | (log-debug "status: ~v" status) | |
7fd20778 SK |
571 | ; TODO Handle redirects. Should be within same timeout as req and body. |
572 | (let ([result | |
573 | (match status | |
574 | [200 | |
575 | `(ok . ,(uri-download-from-port u headers body-input))] | |
576 | [_ | |
577 | `(error . (http . ,status))])]) | |
578 | (close-input-port body-input) | |
579 | result)])) | |
4764ff89 | 580 | |
98529d3d | 581 | (: timeline-print (-> Out-Format (Listof Msg) Void)) |
b201e854 | 582 | (define (timeline-print out-format timeline) |
3d042e75 | 583 | (void (foldl (match-lambda** |
b0ff061a | 584 | [((and m (Msg _ _ nick _ _ _)) (cons prev-nick i)) |
13c11724 | 585 | (let ([i (if (equal? prev-nick nick) i (+ 1 i))]) |
3d042e75 SK |
586 | (msg-print out-format i m) |
587 | (cons nick i))]) | |
588 | (cons "" 0) | |
589 | timeline))) | |
4764ff89 | 590 | |
dbc26280 | 591 | (: peer->msgs (-> Peer (Listof Msg))) |
b056019b | 592 | (define (peer->msgs peer) |
e2840743 SK |
593 | (match-define (Peer nick uri uri-str _) peer) |
594 | (log-debug "Reading peer nick:~v uri:~v" nick uri-str) | |
fee11be9 | 595 | (define msgs-data (uri-read-cached uri)) |
4a23fd99 | 596 | ; TODO Expire cache |
fee11be9 SK |
597 | (if msgs-data |
598 | (str->msgs nick uri msgs-data) | |
599 | '())) | |
4214c0f3 | 600 | |
7fd20778 SK |
601 | (: peer-download |
602 | (-> Positive-Float Peer | |
603 | (Result (U 'skipped-cached 'downloaded-new) | |
604 | Any))) | |
f65d6338 | 605 | (define (peer-download timeout peer) |
e2840743 | 606 | (match-define (Peer nick uri u _) peer) |
7fd20778 SK |
607 | (log-info "Download BEGIN URL:~a" u) |
608 | (define-values (results _tm-cpu-ms tm-real-ms _tm-gc-ms) | |
f65d6338 | 609 | (time-apply uri-download (list timeout uri))) |
7fd20778 SK |
610 | (define result (car results)) |
611 | (log-info "Download END in ~a seconds, URL:~a, result:~s" | |
612 | (/ tm-real-ms 1000.0) | |
613 | u | |
614 | result) | |
615 | result) | |
4214c0f3 | 616 | |
e2840743 | 617 | (: timeline-download (-> Integer Positive-Float (Setof Peer) Void)) |
f65d6338 | 618 | (define (timeline-download num-workers timeout peers) |
7fd20778 SK |
619 | (define results |
620 | (concurrent-filter-map num-workers | |
621 | (λ (p) (cons p (peer-download timeout p))) | |
e2840743 | 622 | (set->list peers))) |
d54812ea SK |
623 | (define peers-ok |
624 | (filter-map (match-lambda | |
625 | [(cons p (cons 'ok _)) p] | |
626 | [(cons _ (cons 'error e)) #f]) | |
627 | results)) | |
628 | (define peers-err | |
629 | (filter-map (match-lambda | |
630 | [(cons _ (cons 'ok _)) | |
631 | #f] | |
632 | [(cons p (cons 'error e)) | |
633 | (struct-copy Peer p [comment (format "~s" e)])]) | |
634 | results)) | |
7fd20778 | 635 | (peers->file peers-ok (build-path tt-home-dir "peers-last-downloaded-ok")) |
7fd20778 | 636 | (peers->file peers-err (build-path tt-home-dir "peers-last-downloaded-err"))) |
9a6a9f9a | 637 | |
e2840743 | 638 | (: peers->timeline (-> (Listof Peer) (Listof Msg))) |
a60c484e | 639 | (define (peers->timeline peers) |
e2840743 | 640 | (append* (filter-map peer->msgs (set->list peers)))) |
a60c484e | 641 | |
e2840743 | 642 | (: timeline-sort (-> (Listof Msg) timeline-order (Listof Msgs))) |
a60c484e | 643 | (define (timeline-sort msgs order) |
a4899240 SK |
644 | (define cmp (match order |
645 | ['old->new <] | |
646 | ['new->old >])) | |
a60c484e SK |
647 | (sort msgs (λ (a b) (cmp (Msg-ts-epoch a) |
648 | (Msg-ts-epoch b))))) | |
4764ff89 | 649 | |
e2840743 | 650 | (: paths->peers (-> (Listof String) (Setof Peer))) |
d0a0e073 SK |
651 | (define (paths->peers paths) |
652 | (let* ([paths (match paths | |
653 | ['() | |
654 | (let ([peer-refs-file (build-path tt-home-dir "peers")]) | |
655 | (log-debug | |
656 | "No peer ref file paths provided, defaulting to ~v" | |
657 | (path->string peer-refs-file)) | |
658 | (list peer-refs-file))] | |
659 | [paths | |
660 | (log-debug "Peer ref file paths provided: ~v" paths) | |
661 | (map string->path paths)])] | |
e2840743 SK |
662 | [peers (apply set-union (map file->peers paths))]) |
663 | (log-info "Read-in ~a peers." (set-count peers)) | |
664 | peers)) | |
d0a0e073 | 665 | |
e2840743 | 666 | (: mentioned-peers-in-cache (-> (Setof Peer))) |
d3ac9e11 | 667 | (define (mentioned-peers-in-cache) |
4a23fd99 | 668 | ; TODO Expire cache |
d3ac9e11 SK |
669 | (define msgs |
670 | (append* (map (λ (filename) | |
671 | (define path (build-path cache-object-dir filename)) | |
672 | (define size (/ (file-size path) 1000000.0)) | |
b8b29fbb | 673 | (log-debug "BEGIN parsing ~a MB from file: ~v" |
e8856d5c SK |
674 | size |
675 | (path->string path)) | |
d3ac9e11 SK |
676 | (define t0 (current-inexact-milliseconds)) |
677 | (define m (filter-map | |
678 | (λ (line) | |
679 | (str->msg #f (cache-object-filename->url filename) line)) | |
680 | (filter-comments | |
681 | (file->lines path)))) | |
682 | (define t1 (current-inexact-milliseconds)) | |
b8b29fbb | 683 | (log-debug "END parsing ~a MB in ~a seconds from file: ~v." |
e8856d5c SK |
684 | size |
685 | (* 0.001 (- t1 t0)) | |
686 | (path->string path)) | |
d3ac9e11 | 687 | (when (empty? m) |
b8b29fbb | 688 | (log-debug "No messages found in ~a" (path->string path))) |
d3ac9e11 SK |
689 | m) |
690 | (directory-list cache-object-dir)))) | |
e2840743 | 691 | (make-immutable-peers (append* (map Msg-mentions msgs)))) |
d3ac9e11 | 692 | |
56de6228 SK |
693 | (: log-writer-stop (-> Thread Void)) |
694 | (define (log-writer-stop log-writer) | |
695 | (log-message (current-logger) 'fatal 'stop "Exiting." #f) | |
696 | (thread-wait log-writer)) | |
697 | ||
0d3f753c SK |
698 | (: log-writer-start (-> Log-Level Thread)) |
699 | (define (log-writer-start level) | |
56de6228 SK |
700 | (let* ([logger |
701 | (make-logger #f #f level #f)] | |
702 | [log-receiver | |
703 | (make-log-receiver logger level)] | |
704 | [log-writer | |
705 | (thread | |
706 | (λ () | |
707 | (parameterize | |
708 | ([date-display-format 'iso-8601]) | |
709 | (let loop () | |
710 | (match-define (vector level msg _ topic) (sync log-receiver)) | |
711 | (unless (equal? topic 'stop) | |
712 | (eprintf "~a [~a] ~a~n" (date->string (current-date) #t) level msg) | |
713 | (loop))))))]) | |
714 | (current-logger logger) | |
715 | log-writer)) | |
01e4c499 | 716 | |
e8856d5c SK |
717 | (: crawl (-> Void)) |
718 | (define (crawl) | |
e2840743 | 719 | (let* ([peers-all-file |
e8856d5c SK |
720 | (build-path tt-home-dir "peers-all")] |
721 | [peers-mentioned-file | |
722 | (build-path tt-home-dir "peers-mentioned")] | |
723 | [peers-parsed-file | |
724 | (build-path tt-home-dir "peers-parsed")] | |
725 | [peers-mentioned-curr | |
726 | (mentioned-peers-in-cache)] | |
727 | [peers-mentioned-prev | |
728 | (file->peers peers-mentioned-file)] | |
729 | [peers-mentioned | |
e2840743 SK |
730 | (set-union peers-mentioned-prev |
731 | peers-mentioned-curr)] | |
e8856d5c SK |
732 | [peers-all-prev |
733 | (file->peers peers-all-file)] | |
734 | [peers-all | |
e2840743 SK |
735 | (set-union peers-mentioned |
736 | peers-all-prev)] | |
e8856d5c | 737 | [peers-discovered |
e2840743 SK |
738 | (set-subtract peers-all |
739 | peers-all-prev)] | |
e8856d5c | 740 | [peers-parsed |
e2840743 | 741 | (for/set ([p peers-all] #:when (> (length (peer->msgs p)) 0)) p)]) |
e8856d5c | 742 | ; TODO Deeper de-duping |
e2840743 SK |
743 | (log-info "Known peers mentioned: ~a" (set-count peers-mentioned)) |
744 | (log-info "Known peers parsed ~a" (set-count peers-parsed)) | |
745 | (log-info "Known peers total: ~a" (set-count peers-all)) | |
e8856d5c SK |
746 | (log-info "Discovered ~a new peers:~n~a" |
747 | (set-count peers-discovered) | |
748 | (pretty-format (map | |
e2840743 SK |
749 | (match-lambda |
750 | [(Peer n _ u c) (list n u c)]) | |
e8856d5c SK |
751 | (set->list peers-discovered)))) |
752 | (peers->file peers-mentioned | |
753 | peers-mentioned-file) | |
754 | (peers->file peers-parsed | |
755 | peers-parsed-file) | |
756 | (peers->file peers-all | |
757 | peers-all-file))) | |
758 | ||
759 | (: read (-> (Listof String) Number Number Timeline-Order Out-Format Void)) | |
760 | (define (read file-paths ts-min ts-max order out-format) | |
761 | (let* ([peers | |
762 | (paths->peers file-paths)] | |
763 | [msgs | |
764 | (timeline-sort (peers->timeline peers) order)] | |
765 | [include? | |
766 | (λ (m) | |
767 | (and (or (not ts-min) (>= (Msg-ts-epoch m) ts-min)) | |
768 | (or (not ts-max) (<= (Msg-ts-epoch m) ts-max))))]) | |
769 | (timeline-print out-format (filter include? msgs)))) | |
770 | ||
771 | (: upload (-> Void)) | |
772 | (define (upload) | |
773 | ; FIXME Should not exit from here, but only after cleanup/logger-stoppage. | |
774 | (if (system (path->string (build-path tt-home-dir "hooks" "upload"))) | |
775 | (exit 0) | |
776 | (exit 1))) | |
777 | ||
778 | (: download (-> (Listof String) Positive-Integer Positive-Float Void)) | |
779 | (define (download file-paths num-workers timeout) | |
780 | (let ([peers (paths->peers file-paths)]) | |
781 | (define-values (_res _cpu real-ms _gc) | |
782 | (time-apply timeline-download (list num-workers timeout peers))) | |
783 | (log-info "Downloaded timelines from ~a peers in ~a seconds." | |
e2840743 | 784 | (set-count peers) |
e8856d5c SK |
785 | (/ real-ms 1000.0)))) |
786 | ||
787 | (: dispatch (-> String Void)) | |
788 | (define (dispatch command) | |
789 | (match command | |
790 | [(or "d" "download") | |
791 | ; Initially, 15 was fastest out of the tried: 1, 5, 10, 20. Then I | |
792 | ; started noticing significant slowdowns. Reducing to 5 seems to help. | |
793 | (let ([num-workers 5] | |
794 | [timeout 10.0]) | |
795 | (command-line | |
796 | #:program "tt download" | |
797 | #:once-each | |
798 | [("-j" "--jobs") | |
799 | njobs "Number of concurrent jobs." | |
800 | (set! num-workers (string->number njobs))] | |
801 | [("-t" "--timeout") | |
802 | seconds "Timeout seconds per request." | |
803 | (set! timeout (string->number seconds))] | |
804 | #:args file-paths | |
805 | (download file-paths num-workers timeout)))] | |
806 | [(or "u" "upload") | |
807 | (command-line | |
808 | #:program "tt upload" #:args () (upload))] | |
809 | [(or "r" "read") | |
810 | (let ([out-format 'multi-line] | |
811 | [order 'old->new] | |
812 | [ts-min #f] | |
813 | [ts-max #f]) | |
814 | (command-line | |
815 | #:program "tt read" | |
816 | #:once-each | |
817 | [("-r" "--rev") | |
818 | "Reverse displayed timeline order." | |
819 | (set! order 'new->old)] | |
820 | [("-m" "--min") | |
821 | m "Earliest time to display (ignore anything before it)." | |
822 | (set! ts-min (rfc3339->epoch m))] | |
823 | [("-x" "--max") | |
824 | x "Latest time to display (ignore anything after it)." | |
825 | (set! ts-max (rfc3339->epoch x))] | |
826 | #:once-any | |
827 | [("-s" "--short") | |
828 | "Short output format" | |
829 | (set! out-format 'single-line)] | |
830 | [("-l" "--long") | |
831 | "Long output format" | |
832 | (set! out-format 'multi-line)] | |
833 | #:args file-paths | |
834 | (read file-paths ts-min ts-max order out-format)))] | |
835 | [(or "c" "crawl") | |
836 | (command-line | |
837 | #:program "tt crawl" #:args () (crawl))] | |
838 | [command | |
839 | (eprintf "Error: invalid command: ~v\n" command) | |
840 | (eprintf "Please use the \"--help\" option to see a list of available commands.\n") | |
841 | (exit 1)])) | |
842 | ||
24c6a76b | 843 | (module+ main |
24f1f64b | 844 | (let ([log-level 'info]) |
c562bea3 | 845 | (command-line |
24f1f64b SK |
846 | #:program |
847 | "tt" | |
c562bea3 | 848 | #:once-each |
01e4c499 SK |
849 | [("-d" "--debug") |
850 | "Enable debug log level." | |
851 | (set! log-level 'debug)] | |
24f1f64b SK |
852 | #:help-labels |
853 | "" | |
854 | "and <command> is one of" | |
a60c484e | 855 | "r, read : Read the timeline (offline operation)." |
4214c0f3 | 856 | "d, download : Download the timeline." |
edadb804 | 857 | ; TODO Add path dynamically |
0868c39a | 858 | "u, upload : Upload your twtxt file (alias to execute ~/.tt/hooks/upload)." |
a60c484e | 859 | "c, crawl : Discover new peers mentioned by known peers (offline operation)." |
24f1f64b SK |
860 | "" |
861 | #:args (command . args) | |
0d3f753c | 862 | (define log-writer (log-writer-start log-level)) |
4214c0f3 | 863 | (current-command-line-arguments (list->vector args)) |
e8856d5c SK |
864 | ; TODO dispatch should return status with which we should exit after cleanups |
865 | (dispatch command) | |
0d3f753c | 866 | (log-writer-stop log-writer)))) |