Commit | Line | Data |
---|---|---|
cd541405 | 1 | #lang typed/racket/no-check |
4764ff89 | 2 | |
1d753430 | 3 | (require openssl/sha1) |
4764ff89 | 4 | (require racket/date) |
9c464d95 | 5 | (require |
d718efc4 | 6 | net/head |
edadb804 | 7 | net/uri-codec |
8da029e4 | 8 | net/url) |
4764ff89 | 9 | |
3c9c8266 SK |
10 | (require (prefix-in info: "info.rkt")) |
11 | ||
78398948 | 12 | (module+ test |
de3ff448 | 13 | (require rackunit)) |
78398948 | 14 | |
98529d3d SK |
15 | (define-type Url |
16 | net/url-structs:url) | |
17 | ||
18 | (define-type Out-Format | |
19 | (U 'single-line | |
20 | 'multi-line)) | |
21 | ||
22 | (define-type Timeline-Order | |
23 | (U 'old->new | |
24 | 'new->old)) | |
25 | ||
b0ff061a | 26 | (struct Msg |
78142acb | 27 | ([ts-epoch : Integer] |
3877a0c4 | 28 | [ts-orig : String] |
13c11724 | 29 | [nick : (Option String)] |
98529d3d | 30 | [uri : Url] |
13c11724 | 31 | [text : String] |
b0ff061a | 32 | [mentions : (Listof Peer)])) |
9c464d95 | 33 | |
dbc26280 | 34 | (struct Peer |
13c11724 | 35 | ([nick : (Option String)] |
d0a0e073 SK |
36 | [uri : Url]) |
37 | #:transparent) | |
4764ff89 | 38 | |
eade8175 SK |
39 | (struct Follower |
40 | ([nick : (Option String)] | |
41 | [uri : Url] | |
42 | [client : String] | |
43 | [version : String]) | |
44 | #:transparent) | |
45 | ||
46 | ; TODO Normalize dir var naming | |
edadb804 SK |
47 | (: tt-home-dir Path-String) |
48 | (define tt-home-dir (build-path (expand-user-path "~") ".tt")) | |
49 | ||
9a346534 | 50 | (: concurrent-filter-map (∀ (α β) (-> Natural (-> α β) (Listof α) (Listof β)))) |
78142acb | 51 | (define (concurrent-filter-map num-workers f xs) |
dad4504d | 52 | ; TODO preserve order of elements OR communicate that reorder is expected |
a239a233 | 53 | ; TODO switch from mailboxes to channels |
895a32cf SK |
54 | (define (make-worker id f) |
55 | (define parent (current-thread)) | |
56 | (λ () | |
a9511f7c SK |
57 | (define self : Thread (current-thread)) |
58 | (: work (∀ (α) (-> α))) | |
895a32cf SK |
59 | (define (work) |
60 | (thread-send parent (cons 'next self)) | |
61 | (match (thread-receive) | |
c562bea3 SK |
62 | ['done (thread-send parent (cons 'exit id))] |
63 | [(cons 'unit x) (begin | |
64 | (define y (f x)) | |
65 | (when y (thread-send parent (cons 'result y))) | |
66 | (work))])) | |
895a32cf | 67 | (work))) |
a9511f7c | 68 | (: dispatch (∀ (α β) (-> (Listof Nonnegative-Integer) (Listof α) (Listof β)))) |
895a32cf SK |
69 | (define (dispatch ws xs ys) |
70 | (if (empty? ws) | |
f1493e49 SK |
71 | ys |
72 | (match (thread-receive) | |
c562bea3 SK |
73 | [(cons 'exit w) (dispatch (remove w ws =) xs ys)] |
74 | [(cons 'result y) (dispatch ws xs (cons y ys))] | |
75 | [(cons 'next thd) (match xs | |
76 | ['() (begin | |
77 | (thread-send thd 'done) | |
78 | (dispatch ws xs ys))] | |
79 | [(cons x xs) (begin | |
80 | (thread-send thd (cons 'unit x)) | |
81 | (dispatch ws xs ys))])]))) | |
78142acb | 82 | (define workers (range num-workers)) |
9926c9a9 SK |
83 | (define threads (map (λ (id) (thread (make-worker id f))) workers)) |
84 | (define results (dispatch workers xs '())) | |
895a32cf SK |
85 | (for-each thread-wait threads) |
86 | results) | |
87 | ||
dad4504d | 88 | (module+ test |
de3ff448 SK |
89 | (let* ([f (λ (x) (if (even? x) x #f))] |
90 | [xs (range 11)] | |
91 | [actual (sort (concurrent-filter-map 10 f xs) <)] | |
92 | [expected (sort ( filter-map f xs) <)]) | |
c562bea3 | 93 | (check-equal? actual expected "concurrent-filter-map"))) |
dad4504d | 94 | |
98529d3d | 95 | (: msg-print (-> Out-Format Integer Msg Void)) |
3d042e75 SK |
96 | (define msg-print |
97 | (let* ([colors (vector 36 33)] | |
98 | [n (vector-length colors)]) | |
99 | (λ (out-format color-i msg) | |
01e4c499 | 100 | (let ([color (vector-ref colors (modulo color-i n))] |
b0ff061a SK |
101 | [nick (Msg-nick msg)] |
102 | [uri (url->string (Msg-uri msg))] | |
103 | [text (Msg-text msg)] | |
104 | [mentions (Msg-mentions msg)]) | |
3d042e75 | 105 | (match out-format |
01e4c499 | 106 | ['single-line |
13c11724 SK |
107 | (let ([nick (if nick nick uri)]) |
108 | (printf "~a \033[1;37m<~a>\033[0m \033[0;~am~a\033[0m~n" | |
109 | (parameterize | |
110 | ([date-display-format 'iso-8601]) | |
b0ff061a | 111 | (date->string (seconds->date (Msg-ts-epoch msg)) #t)) |
13c11724 | 112 | nick color text))] |
01e4c499 | 113 | ['multi-line |
13c11724 SK |
114 | (let ([nick (if nick (string-append nick " ") "")]) |
115 | (printf "~a (~a)~n\033[1;37m<~a~a>\033[0m~n\033[0;~am~a\033[0m~n~n" | |
116 | (parameterize | |
117 | ([date-display-format 'rfc2822]) | |
b0ff061a SK |
118 | (date->string (seconds->date (Msg-ts-epoch msg)) #t)) |
119 | (Msg-ts-orig msg) | |
13c11724 | 120 | nick uri color text))]))))) |
e96264cc | 121 | |
3877a0c4 SK |
122 | (: rfc3339->epoch (-> String (Option Nonnegative-Integer))) |
123 | (define rfc3339->epoch | |
124 | (let ([re (pregexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2})(:([0-9]{2}))?(\\.[0-9]+)?(Z|([+-])([0-9]{1,2}):?([0-9]{2}))?$")]) | |
125 | (λ (ts) | |
126 | (match (regexp-match re ts) | |
127 | [(list _wholething yyyy mm dd HH MM _:SS SS _fractional tz-whole tz-sign tz-HH tz-MM) | |
128 | (let* | |
129 | ([tz-offset | |
130 | (match* (tz-whole tz-sign tz-HH tz-MM) | |
131 | [("Z" #f #f #f) | |
132 | 0] | |
133 | [(_ (or "-" "+") (? identity h) (? identity m)) | |
134 | (let ([h (string->number h)] | |
135 | [m (string->number m)] | |
136 | ; Reverse to get back to UTC: | |
137 | [op (match tz-sign ["+" -] ["-" +])]) | |
138 | (op 0 (+ (* 60 m) (* 60 (* 60 h)))))] | |
139 | [(a b c d) | |
140 | (log-warning "Impossible TZ string: ~v, components: ~v ~v ~v ~v" tz-whole a b c d) | |
141 | 0])] | |
142 | [ts-orig ts] | |
143 | [local-time? #f] | |
144 | [ts-epoch (find-seconds (if SS (string->number SS) 0) | |
145 | (string->number MM) | |
146 | (string->number HH) | |
147 | (string->number dd) | |
148 | (string->number mm) | |
149 | (string->number yyyy) | |
150 | local-time?)]) | |
151 | (+ ts-epoch tz-offset))] | |
152 | [_ | |
153 | (log-error "Invalid timestamp: ~v" ts) | |
154 | #f])))) | |
155 | ||
13c11724 | 156 | (: str->msg (-> (Option String) Url String (Option Msg))) |
b4689464 | 157 | (define str->msg |
3877a0c4 | 158 | (let ([re (pregexp "^([^\\s\t]+)[\\s\t]+(.*)$")]) |
b4689464 | 159 | (λ (nick uri str) |
d3ac9e11 | 160 | (define str-head (substring str 0 (min 100 (string-length str)))) |
b4689464 SK |
161 | (with-handlers* |
162 | ([exn:fail? | |
163 | (λ (e) | |
9c464d95 SK |
164 | (log-error |
165 | "Failed to parse msg: ~v, from: ~v, at: ~v, because: ~v" | |
d3ac9e11 | 166 | str-head nick (url->string uri) e) |
b4689464 SK |
167 | #f)]) |
168 | (match (regexp-match re str) | |
3877a0c4 SK |
169 | [(list _wholething ts-orig text) |
170 | (let ([ts-epoch (rfc3339->epoch ts-orig)]) | |
171 | (if ts-epoch | |
13c11724 SK |
172 | (let ([mentions |
173 | (filter-map | |
174 | (λ (m) (match (regexp-match #px"@<([^>]+)>" m) | |
175 | [(list _wholething nick-uri) | |
dbc26280 | 176 | (str->peer nick-uri)])) |
13c11724 | 177 | (regexp-match* #px"@<[^\\s]+([\\s]+)?[^>]+>" text))]) |
b0ff061a | 178 | (Msg ts-epoch ts-orig nick uri text mentions)) |
3877a0c4 SK |
179 | (begin |
180 | (log-error | |
181 | "Msg rejected due to invalid timestamp: ~v, nick:~v, uri:~v" | |
d3ac9e11 | 182 | str-head nick (url->string uri)) |
3877a0c4 | 183 | #f)))] |
b4689464 | 184 | [_ |
d3ac9e11 | 185 | (log-debug "Non-msg line from nick:~v, line:~a" nick str-head) |
b4689464 | 186 | #f]))))) |
88d50b3e | 187 | |
63afa259 | 188 | (module+ test |
13c11724 | 189 | ; TODO Test for when missing-nick case |
b4689464 SK |
190 | (let* ([tzs (for*/list ([d '("-" "+")] |
191 | [h '("5" "05")] | |
192 | [m '("00" ":00" "57" ":57")]) | |
193 | (string-append d h m))] | |
194 | [tzs (list* "" "Z" tzs)]) | |
195 | (for* ([n '("fake-nick")] | |
196 | [u '("fake-uri")] | |
197 | [s '("" ":10")] | |
198 | [f '("" ".1337")] | |
199 | [z tzs] | |
200 | [sep (list "\t" " ")] | |
201 | [txt '("foo bar baz" "'jaz poop bear giraffe / tea" "@*\"``")]) | |
202 | (let* ([ts (string-append "2020-11-18T22:22" | |
203 | (if (non-empty-string? s) s ":00") | |
204 | z)] | |
205 | [m (str->msg n u (string-append ts sep txt))]) | |
206 | (check-not-false m) | |
b0ff061a SK |
207 | (check-equal? (Msg-nick m) n) |
208 | (check-equal? (Msg-uri m) u) | |
209 | (check-equal? (Msg-text m) txt) | |
210 | (check-equal? (Msg-ts-orig m) ts (format "Given: ~v" ts)) | |
b4689464 SK |
211 | ))) |
212 | ||
de3ff448 SK |
213 | (let* ([ts "2020-11-18T22:22:09-0500"] |
214 | [tab " "] | |
215 | [text "Lorem ipsum"] | |
216 | [nick "foo"] | |
217 | [uri "bar"] | |
218 | [actual (str->msg nick uri (string-append ts tab text))] | |
a139076c | 219 | [expected (Msg 1605756129 ts nick uri text '())]) |
c562bea3 | 220 | (check-equal? |
b0ff061a SK |
221 | (Msg-ts-epoch actual) |
222 | (Msg-ts-epoch expected) | |
78142acb | 223 | "str->msg ts-epoch") |
3877a0c4 | 224 | (check-equal? |
b0ff061a SK |
225 | (Msg-ts-orig actual) |
226 | (Msg-ts-orig expected) | |
3877a0c4 | 227 | "str->msg ts-orig") |
c562bea3 | 228 | (check-equal? |
b0ff061a SK |
229 | (Msg-nick actual) |
230 | (Msg-nick expected) | |
c562bea3 SK |
231 | "str->msg nick") |
232 | (check-equal? | |
b0ff061a SK |
233 | (Msg-uri actual) |
234 | (Msg-uri expected) | |
c562bea3 SK |
235 | "str->msg uri") |
236 | (check-equal? | |
b0ff061a SK |
237 | (Msg-text actual) |
238 | (Msg-text expected) | |
c562bea3 | 239 | "str->msg text"))) |
63afa259 | 240 | |
98529d3d | 241 | (: str->lines (-> String (Listof String))) |
e96264cc SK |
242 | (define (str->lines str) |
243 | (string-split str (regexp "[\r\n]+"))) | |
244 | ||
63afa259 | 245 | (module+ test |
de3ff448 | 246 | (check-equal? (str->lines "abc\ndef\n\nghi") '("abc" "def" "ghi"))) |
63afa259 | 247 | |
13c11724 | 248 | (: str->msgs (-> (Option String) Url String (Listof Msg))) |
b201e854 | 249 | (define (str->msgs nick uri str) |
3877a0c4 | 250 | (filter-map (λ (line) (str->msg nick uri line)) (filter-comments (str->lines str)))) |
4764ff89 | 251 | |
edadb804 SK |
252 | (: cache-dir Path-String) |
253 | (define cache-dir (build-path tt-home-dir "cache")) | |
254 | ||
d718efc4 SK |
255 | (define cache-object-dir (build-path cache-dir "objects")) |
256 | ||
edadb804 SK |
257 | (: url->cache-file-path-v1 (-> Url Path-String)) |
258 | (define (url->cache-file-path-v1 uri) | |
259 | (define (hash-sha1 str) : (-> String String) | |
260 | (define in (open-input-string str)) | |
261 | (define digest (sha1 in)) | |
262 | (close-input-port in) | |
263 | digest) | |
d718efc4 | 264 | (build-path cache-object-dir (hash-sha1 (url->string uri)))) |
edadb804 SK |
265 | |
266 | (: url->cache-file-path-v2 (-> Url Path-String)) | |
267 | (define (url->cache-file-path-v2 uri) | |
d718efc4 SK |
268 | (build-path cache-object-dir (uri-encode (url->string uri)))) |
269 | ||
270 | (define url->cache-object-path url->cache-file-path-v2) | |
1d753430 | 271 | |
d3ac9e11 SK |
272 | (: cache-object-filename->url (-> Path-String Url)) |
273 | (define (cache-object-filename->url name) | |
274 | (string->url (uri-decode (path->string name)))) | |
275 | ||
d718efc4 SK |
276 | (define (url->cache-etag-path uri) |
277 | (build-path cache-dir "etags" (uri-encode (url->string uri)))) | |
278 | ||
279 | (define (url->cache-lmod-path uri) | |
280 | (build-path cache-dir "lmods" (uri-encode (url->string uri)))) | |
9c464d95 | 281 | |
98529d3d SK |
282 | ; TODO Return Option |
283 | (: uri-read-cached (-> Url String)) | |
4214c0f3 | 284 | (define (uri-read-cached uri) |
edadb804 SK |
285 | (define path-v1 (url->cache-file-path-v1 uri)) |
286 | (define path-v2 (url->cache-file-path-v2 uri)) | |
287 | (when (file-exists? path-v1) | |
288 | (rename-file-or-directory path-v1 path-v2 #t)) | |
289 | (if (file-exists? path-v2) | |
290 | (file->string path-v2) | |
0e16a46c | 291 | (begin |
9c464d95 | 292 | (log-warning "Cache file not found for URI: ~a" (url->string uri)) |
4214c0f3 SK |
293 | ""))) |
294 | ||
a60c484e SK |
295 | (: uri? (-> String Boolean)) |
296 | (define (uri? str) | |
297 | (regexp-match? #rx"^[a-z]+://.*" (string-downcase str))) | |
298 | ||
dbc26280 SK |
299 | (: str->peer (String (Option Peer))) |
300 | (define (str->peer str) | |
301 | (log-debug "Parsing peer string: ~v" str) | |
13c11724 SK |
302 | (with-handlers* |
303 | ([exn:fail? | |
304 | (λ (e) | |
305 | (log-error "Invalid URI in string: ~v, exn: ~v" str e) | |
306 | #f)]) | |
307 | (match (string-split str) | |
a60c484e SK |
308 | [(list u) #:when (uri? u) (Peer #f (string->url u))] |
309 | [(list n u) #:when (uri? u) (Peer n (string->url u))] | |
13c11724 | 310 | [_ |
dbc26280 | 311 | (log-error "Invalid peer string: ~v" str) |
13c11724 SK |
312 | #f]))) |
313 | ||
9c464d95 | 314 | |
98529d3d | 315 | (: filter-comments (-> (Listof String) (Listof String))) |
9c464d95 SK |
316 | (define (filter-comments lines) |
317 | (filter-not (λ (line) (string-prefix? line "#")) lines)) | |
318 | ||
dbc26280 SK |
319 | (: str->peers (-> String (Listof Peer))) |
320 | (define (str->peers str) | |
321 | (filter-map str->peer (filter-comments (str->lines str)))) | |
9c464d95 | 322 | |
a60c484e SK |
323 | (: peers->file (-> (Listof Peers) Path-String Void)) |
324 | (define (peers->file peers path) | |
325 | (display-lines-to-file | |
326 | (map (match-lambda | |
327 | [(Peer n u) | |
328 | (format "~a~a" (if n (format "~a " n) "") (url->string u))]) | |
329 | peers) | |
330 | path | |
331 | #:exists 'replace)) | |
332 | ||
dbc26280 | 333 | (: file->peers (-> Path-String (Listof Peer))) |
d0a0e073 SK |
334 | (define (file->peers file-path) |
335 | (if (file-exists? file-path) | |
336 | (str->peers (file->string file-path)) | |
337 | (begin | |
a60c484e | 338 | (log-warning "File does not exist: ~v" (path->string file-path)) |
d0a0e073 | 339 | '()))) |
9c464d95 | 340 | |
9c5e4499 SK |
341 | (define re-rfc2822 |
342 | #px"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0-9]{2}) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) ([0-2][0-9]):([0-6][0-9]):([0-6][0-9]) GMT") | |
343 | ||
344 | (: b->n (-> Bytes (Option Number))) | |
345 | (define (b->n b) | |
346 | (string->number (bytes->string/utf-8 b))) | |
347 | ||
348 | (: mon->num (-> Bytes Natural)) | |
349 | (define/match (mon->num mon) | |
350 | [(#"Jan") 1] | |
351 | [(#"Feb") 2] | |
352 | [(#"Mar") 3] | |
353 | [(#"Apr") 4] | |
354 | [(#"May") 5] | |
355 | [(#"Jun") 6] | |
356 | [(#"Jul") 7] | |
357 | [(#"Aug") 8] | |
358 | [(#"Sep") 9] | |
359 | [(#"Oct") 10] | |
360 | [(#"Nov") 11] | |
361 | [(#"Dec") 12]) | |
362 | ||
363 | (: rfc2822->epoch (-> Bytes (Option Nonnegative-Integer))) | |
364 | (define (rfc2822->epoch timestamp) | |
365 | (match (regexp-match re-rfc2822 timestamp) | |
366 | [(list _ _ dd mo yyyy HH MM SS) | |
367 | #:when (and dd mo yyyy HH MM SS) | |
368 | (find-seconds (b->n SS) | |
369 | (b->n MM) | |
370 | (b->n HH) | |
371 | (b->n dd) | |
372 | (mon->num mo) | |
373 | (b->n yyyy) | |
374 | #f)] | |
375 | [_ | |
376 | #f])) | |
377 | ||
98529d3d | 378 | (: user-agent String) |
9c464d95 SK |
379 | (define user-agent |
380 | (let* | |
381 | ([prog-name "tt"] | |
3c9c8266 | 382 | [prog-version (info:#%info-lookup 'version)] |
9c464d95 | 383 | [prog-uri "https://github.com/xandkar/tt"] |
dbc26280 | 384 | [user-peer-file (build-path tt-home-dir "me")] |
9c464d95 | 385 | [user |
dbc26280 SK |
386 | (if (file-exists? user-peer-file) |
387 | (match (first (file->peers user-peer-file)) | |
388 | [(Peer #f u) (format "+~a" (url->string u) )] | |
389 | [(Peer n u) (format "+~a; @~a" (url->string u) n)]) | |
9c464d95 SK |
390 | (format "+~a" prog-uri))]) |
391 | (format "~a/~a (~a)" prog-name prog-version user))) | |
392 | ||
d718efc4 SK |
393 | (: header-get (-> (Listof Bytes) Bytes (Option Bytes))) |
394 | (define (header-get headers name) | |
395 | (match (filter-map (curry extract-field name) headers) | |
396 | [(list val) val] | |
397 | [_ #f])) | |
398 | ||
98529d3d | 399 | (: uri-download (-> Url Void)) |
9c464d95 | 400 | (define (uri-download u) |
d718efc4 SK |
401 | (define cached-object-path (url->cache-object-path u)) |
402 | (define cached-etag-path (url->cache-etag-path u)) | |
403 | (define cached-lmod-path (url->cache-lmod-path u)) | |
404 | (log-debug "uri-download ~v into ~v" u cached-object-path) | |
8da029e4 SK |
405 | (define-values (status-line headers body-input) |
406 | ; TODO Timeout. Currently hangs on slow connections. | |
407 | (http-sendrecv/url u #:headers (list (format "User-Agent: ~a" user-agent)))) | |
408 | (log-debug "headers: ~v" headers) | |
409 | (log-debug "status-line: ~v" status-line) | |
410 | (define status | |
411 | (string->number (second (string-split (bytes->string/utf-8 status-line))))) | |
412 | (log-debug "status: ~v" status) | |
413 | ; TODO Handle redirects | |
414 | (match status | |
415 | [200 | |
9c5e4499 SK |
416 | (let* ([etag (header-get headers #"ETag")] |
417 | [lmod (header-get headers #"Last-Modified")] | |
418 | [lmod-curr (if lmod (rfc2822->epoch lmod) #f)] | |
419 | [lmod-prev (if (file-exists? cached-lmod-path) | |
420 | (rfc2822->epoch (file->bytes cached-lmod-path)) | |
421 | #f)]) | |
422 | (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) | |
423 | (unless (or (and etag | |
424 | (file-exists? cached-etag-path) | |
425 | (bytes=? etag (file->bytes cached-etag-path)) | |
426 | (begin | |
427 | (log-info "ETags match, skipping the rest of ~v" (url->string u)) | |
428 | #t)) | |
429 | (and lmod-curr | |
430 | lmod-prev | |
431 | (<= lmod-curr lmod-prev) | |
432 | (begin | |
433 | (log-info "Last-Modified <= current skipping the rest of ~v" (url->string u)) | |
434 | #t))) | |
435 | (begin | |
436 | (log-info | |
437 | "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" | |
438 | (url->string u) etag lmod) | |
439 | (make-parent-directory* cached-object-path) | |
440 | (make-parent-directory* cached-etag-path) | |
441 | (make-parent-directory* cached-lmod-path) | |
442 | (call-with-output-file cached-object-path | |
443 | (curry copy-port body-input) | |
444 | #:exists 'replace) | |
445 | (when etag | |
446 | (display-to-file etag cached-etag-path #:exists 'replace)) | |
447 | (when lmod | |
448 | (display-to-file lmod cached-lmod-path #:exists 'replace)))) | |
8da029e4 SK |
449 | (close-input-port body-input))] |
450 | [_ | |
451 | (raise status)])) | |
4764ff89 | 452 | |
98529d3d | 453 | (: timeline-print (-> Out-Format (Listof Msg) Void)) |
b201e854 | 454 | (define (timeline-print out-format timeline) |
3d042e75 | 455 | (void (foldl (match-lambda** |
b0ff061a | 456 | [((and m (Msg _ _ nick _ _ _)) (cons prev-nick i)) |
13c11724 | 457 | (let ([i (if (equal? prev-nick nick) i (+ 1 i))]) |
3d042e75 SK |
458 | (msg-print out-format i m) |
459 | (cons nick i))]) | |
460 | (cons "" 0) | |
461 | timeline))) | |
4764ff89 | 462 | |
dbc26280 SK |
463 | (: peer->msgs (-> Peer (Listof Msg))) |
464 | (define (peer->msgs f) | |
465 | (match-define (Peer nick uri) f) | |
466 | (log-info "Reading peer nick:~v uri:~v" nick (url->string uri)) | |
9c464d95 | 467 | (str->msgs nick uri (uri-read-cached uri))) |
4214c0f3 | 468 | |
dbc26280 SK |
469 | (: peer-download (-> Peer Void)) |
470 | (define (peer-download f) | |
471 | (match-define (Peer nick uri) f) | |
2db1b40f | 472 | (define u (url->string uri)) |
dbc26280 | 473 | (log-info "Downloading peer uri:~a" u) |
9a6a9f9a | 474 | (with-handlers |
9c464d95 | 475 | ([exn:fail? |
9a6a9f9a | 476 | (λ (e) |
13c11724 | 477 | (log-error "Network error nick:~v uri:~v exn:~v" nick u e) |
9a6a9f9a SK |
478 | #f)] |
479 | [integer? | |
480 | (λ (status) | |
13c11724 | 481 | (log-error "HTTP error nick:~v uri:~a status:~a" nick u status) |
9a6a9f9a | 482 | #f)]) |
2db1b40f SK |
483 | (define-values (_result _tm-cpu-ms tm-real-ms _tm-gc-ms) |
484 | (time-apply uri-download (list uri))) | |
dbc26280 | 485 | (log-info "Peer downloaded in ~a seconds, uri: ~a" (/ tm-real-ms 1000.0) u))) |
4214c0f3 | 486 | |
dbc26280 SK |
487 | (: timeline-download (-> Integer (Listof Peer) Void)) |
488 | (define (timeline-download num-workers peers) | |
4214c0f3 | 489 | ; TODO No need for map - can just iter |
dbc26280 | 490 | (void (concurrent-filter-map num-workers peer-download peers))) |
9a6a9f9a | 491 | |
38c9ecd5 | 492 | (: uniq (∀ (α) (-> (Listof α) (Listof α)))) |
a60c484e SK |
493 | (define (uniq xs) |
494 | (set->list (list->set xs))) | |
495 | ||
496 | (: peers->timeline (-> (listof Peer) (listof Msg))) | |
497 | (define (peers->timeline peers) | |
498 | (append* (filter-map peer->msgs peers))) | |
499 | ||
500 | (: timeline-sort (-> (listof Msg) timeline-order (Listof Msgs))) | |
501 | (define (timeline-sort msgs order) | |
a4899240 SK |
502 | (define cmp (match order |
503 | ['old->new <] | |
504 | ['new->old >])) | |
a60c484e SK |
505 | (sort msgs (λ (a b) (cmp (Msg-ts-epoch a) |
506 | (Msg-ts-epoch b))))) | |
4764ff89 | 507 | |
d0a0e073 SK |
508 | (: paths->peers (-> (Listof String) (Listof Peer))) |
509 | (define (paths->peers paths) | |
510 | (let* ([paths (match paths | |
511 | ['() | |
512 | (let ([peer-refs-file (build-path tt-home-dir "peers")]) | |
513 | (log-debug | |
514 | "No peer ref file paths provided, defaulting to ~v" | |
515 | (path->string peer-refs-file)) | |
516 | (list peer-refs-file))] | |
517 | [paths | |
518 | (log-debug "Peer ref file paths provided: ~v" paths) | |
519 | (map string->path paths)])] | |
520 | [peers (append* (map file->peers paths))]) | |
521 | (log-info "Read-in ~a peers." (length peers)) | |
38c9ecd5 | 522 | (uniq peers))) |
d0a0e073 | 523 | |
d3ac9e11 SK |
524 | (: mentioned-peers-in-cache (-> (Listof Peer))) |
525 | (define (mentioned-peers-in-cache) | |
526 | (define msgs | |
527 | (append* (map (λ (filename) | |
528 | (define path (build-path cache-object-dir filename)) | |
529 | (define size (/ (file-size path) 1000000.0)) | |
530 | (log-info "BEGIN parsing ~a MB from file: ~v" | |
531 | size | |
532 | (path->string path)) | |
533 | (define t0 (current-inexact-milliseconds)) | |
534 | (define m (filter-map | |
535 | (λ (line) | |
536 | (str->msg #f (cache-object-filename->url filename) line)) | |
537 | (filter-comments | |
538 | (file->lines path)))) | |
539 | (define t1 (current-inexact-milliseconds)) | |
540 | (log-info "END parsing ~a MB in ~a seconds from file: ~v." | |
541 | size | |
542 | (* 0.001 (- t1 t0)) | |
543 | (path->string path)) | |
544 | (when (empty? m) | |
545 | (log-warning "No messages found in ~a" (path->string path))) | |
546 | m) | |
547 | (directory-list cache-object-dir)))) | |
548 | (uniq (append* (map Msg-mentions msgs)))) | |
549 | ||
eade8175 SK |
550 | (: follower->peer (-> Follower Peer)) |
551 | (define/match (follower->peer f) | |
552 | [((Follower n u _ _)) (Peer n u)]) | |
553 | ||
554 | (: weblog-line->follower (-> String (Option Peer))) | |
555 | (define weblog-line->follower | |
556 | (let ([re #px"([^/]+)/([^ ]+) +\\(\\+([a-z]+://[^;]+); *@([^\\)]+)\\)"]) | |
557 | (λ (log-line) | |
558 | (match (regexp-match re log-line) | |
559 | [(list _ client version uri nick) | |
560 | (let ([f (Follower nick (string->url uri) client version)]) | |
561 | (log-debug "Found follower: ~v" f) | |
562 | f) ] | |
563 | [_ #f])))) | |
564 | ||
565 | (define (weblog-file->peers file-path) | |
566 | (define size (/ (file-size file-path) 1000000.0)) | |
567 | (log-info "BEGIN parsing ~a MB from file: ~v" size (path->string file-path)) | |
568 | (define t0 (current-inexact-milliseconds)) | |
569 | (define peers | |
570 | (let* ([prefilter-cmd-path | |
571 | (build-path tt-home-dir "hooks" "web-log-prefilter")] | |
572 | [lines | |
573 | (match (process* prefilter-cmd-path file-path) | |
574 | [(list in _out pid err ctrl) | |
575 | (ctrl 'wait) | |
576 | (match (ctrl 'exit-code) | |
577 | [(or 0 1) ; Assuming grep's: 0: found, 1: not found, 2: error | |
578 | (port->lines in)] | |
579 | [_ | |
580 | (log-warning "Prefilter hook failed: ~a" (port->string err)) | |
581 | (file->lines file-path)])])]) | |
582 | (map follower->peer (filter-map weblog-line->follower lines)))) | |
583 | (define t1 (current-inexact-milliseconds)) | |
584 | (log-info "END parsing ~a MB in ~a seconds from file: ~v." | |
585 | size | |
586 | (* 0.001 (- t1 t0)) | |
587 | (path->string file-path)) | |
588 | (when (empty? peers) | |
589 | (log-warning "No peers found in ~a" (path->string file-path))) | |
590 | (uniq peers)) | |
591 | ||
592 | (define (weblog-dir->peers dir-path) | |
593 | (uniq (append* | |
594 | (map weblog-file->peers | |
595 | (filter-map | |
596 | (λ (filename) | |
597 | (define file-path (build-path dir-path filename)) | |
598 | (if (equal? 'file (file-or-directory-type file-path)) | |
599 | file-path | |
600 | #f)) | |
601 | (if (directory-exists? dir-path) | |
602 | (directory-list dir-path) | |
603 | '())))))) | |
604 | ||
605 | (define (follower-peers-in-web-logs log-dirs) | |
606 | (uniq (append* (map weblog-dir->peers log-dirs)))) | |
607 | ||
56de6228 SK |
608 | (: log-writer-stop (-> Thread Void)) |
609 | (define (log-writer-stop log-writer) | |
610 | (log-message (current-logger) 'fatal 'stop "Exiting." #f) | |
611 | (thread-wait log-writer)) | |
612 | ||
0d3f753c SK |
613 | (: log-writer-start (-> Log-Level Thread)) |
614 | (define (log-writer-start level) | |
56de6228 SK |
615 | (let* ([logger |
616 | (make-logger #f #f level #f)] | |
617 | [log-receiver | |
618 | (make-log-receiver logger level)] | |
619 | [log-writer | |
620 | (thread | |
621 | (λ () | |
622 | (parameterize | |
623 | ([date-display-format 'iso-8601]) | |
624 | (let loop () | |
625 | (match-define (vector level msg _ topic) (sync log-receiver)) | |
626 | (unless (equal? topic 'stop) | |
627 | (eprintf "~a [~a] ~a~n" (date->string (current-date) #t) level msg) | |
628 | (loop))))))]) | |
629 | (current-logger logger) | |
630 | log-writer)) | |
01e4c499 | 631 | |
24c6a76b | 632 | (module+ main |
24f1f64b | 633 | (let ([log-level 'info]) |
c562bea3 | 634 | (command-line |
24f1f64b SK |
635 | #:program |
636 | "tt" | |
c562bea3 | 637 | #:once-each |
01e4c499 SK |
638 | [("-d" "--debug") |
639 | "Enable debug log level." | |
640 | (set! log-level 'debug)] | |
24f1f64b SK |
641 | #:help-labels |
642 | "" | |
643 | "and <command> is one of" | |
a60c484e | 644 | "r, read : Read the timeline (offline operation)." |
4214c0f3 | 645 | "d, download : Download the timeline." |
edadb804 | 646 | ; TODO Add path dynamically |
3a4b2233 | 647 | "u, upload : Upload your twtxt file (alias to execute ~/.tt/upload)." |
a60c484e | 648 | "c, crawl : Discover new peers mentioned by known peers (offline operation)." |
24f1f64b SK |
649 | "" |
650 | #:args (command . args) | |
0d3f753c | 651 | (define log-writer (log-writer-start log-level)) |
4214c0f3 | 652 | (current-command-line-arguments (list->vector args)) |
24f1f64b | 653 | (match command |
4214c0f3 | 654 | [(or "d" "download") |
2db1b40f | 655 | ; Initially, 15 was fastest out of the tried: 1, 5, 10, 20. Then I |
edadb804 | 656 | ; started noticing significant slowdowns. Reducing to 5 seems to help. |
2db1b40f | 657 | (let ([num-workers 5]) |
24f1f64b SK |
658 | (command-line |
659 | #:program | |
4214c0f3 | 660 | "tt download" |
24f1f64b SK |
661 | #:once-each |
662 | [("-j" "--jobs") | |
663 | njobs "Number of concurrent jobs." | |
78142acb | 664 | (set! num-workers (string->number njobs))] |
d0a0e073 | 665 | #:args file-paths |
a60c484e SK |
666 | (let ([peers (paths->peers file-paths)]) |
667 | (define-values (_res _cpu real-ms _gc) | |
668 | (time-apply timeline-download (list num-workers peers))) | |
669 | (log-info "Downloaded timelines from ~a peers in ~a seconds." | |
670 | (length peers) | |
eade8175 SK |
671 | (/ real-ms 1000.0))) |
672 | (let ([hook-path (build-path tt-home-dir "hooks" "download")]) | |
673 | (if (file-exists? hook-path) | |
674 | (if (member 'execute (file-or-directory-permissions hook-path)) | |
675 | (if (system (path->string hook-path)) | |
676 | (exit 0) | |
677 | (exit 1)) | |
678 | (log-warning "Download hook found, but not executable.")) | |
679 | (log-warning "Download hook not found.")))))] | |
3a4b2233 SK |
680 | [(or "u" "upload") |
681 | (command-line | |
3877a0c4 SK |
682 | #:program |
683 | "tt upload" | |
684 | #:args () | |
eade8175 | 685 | (if (system (path->string (build-path tt-home-dir "hooks" "upload"))) |
3a4b2233 SK |
686 | (exit 0) |
687 | (exit 1)))] | |
4214c0f3 | 688 | [(or "r" "read") |
a4899240 | 689 | (let ([out-format 'multi-line] |
a993cb85 SK |
690 | [order 'old->new] |
691 | [ts-min #f] | |
692 | [ts-max #f]) | |
4214c0f3 SK |
693 | (command-line |
694 | #:program | |
695 | "tt read" | |
a4899240 SK |
696 | #:once-each |
697 | [("-r" "--rev") | |
698 | "Reverse displayed timeline order." | |
699 | (set! order 'new->old)] | |
a993cb85 SK |
700 | [("-m" "--min") |
701 | m "Earliest time to display (ignore anything before it)." | |
702 | (set! ts-min (rfc3339->epoch m))] | |
703 | [("-x" "--max") | |
704 | x "Latest time to display (ignore anything after it)." | |
705 | (set! ts-max (rfc3339->epoch x))] | |
24f1f64b SK |
706 | #:once-any |
707 | [("-s" "--short") | |
708 | "Short output format" | |
709 | (set! out-format 'single-line)] | |
710 | [("-l" "--long") | |
711 | "Long output format" | |
712 | (set! out-format 'multi-line)] | |
d0a0e073 | 713 | #:args file-paths |
a60c484e SK |
714 | (let* ([peers |
715 | (paths->peers file-paths)] | |
716 | [timeline | |
a993cb85 SK |
717 | (timeline-sort (peers->timeline peers) order)] |
718 | [timeline | |
719 | (filter (λ (m) (and (if ts-min (>= (Msg-ts-epoch m) | |
720 | ts-min) | |
721 | #t) | |
722 | (if ts-max (<= (Msg-ts-epoch m) | |
723 | ts-max) | |
724 | #t))) | |
725 | timeline)]) | |
a60c484e SK |
726 | (timeline-print out-format timeline))))] |
727 | [(or "c" "crawl") | |
728 | (command-line | |
729 | #:program | |
730 | "tt crawl" | |
eade8175 | 731 | #:args log-files-directories |
ef07b6ed SK |
732 | (let* ([peers-sort |
733 | (λ (peers) (sort peers (match-lambda** | |
734 | [((Peer n1 _) (Peer n2 _)) | |
735 | (string<? (if n1 n1 "") | |
736 | (if n2 n2 ""))])))] | |
eade8175 | 737 | ; TODO Tidy-up redundant set conversions |
ef07b6ed | 738 | [peers-all-file |
a60c484e SK |
739 | (build-path tt-home-dir "peers-all")] |
740 | [peers-mentioned-file | |
741 | (build-path tt-home-dir "peers-mentioned")] | |
eade8175 SK |
742 | [peers-followers-file |
743 | (build-path tt-home-dir "peers-followers")] | |
744 | [peers-followers-curr | |
745 | (follower-peers-in-web-logs | |
746 | (if (empty? log-files-directories) | |
747 | (list (build-path tt-home-dir "web-logs")) | |
748 | log-files-directories))] | |
a60c484e | 749 | [peers-mentioned-curr |
d3ac9e11 | 750 | (mentioned-peers-in-cache)] |
a60c484e SK |
751 | [peers-mentioned-prev |
752 | (file->peers peers-mentioned-file)] | |
eade8175 SK |
753 | [peers-followers-prev |
754 | (file->peers peers-followers-file)] | |
a60c484e | 755 | [peers-mentioned |
ef07b6ed SK |
756 | (peers-sort (uniq (append peers-mentioned-prev |
757 | peers-mentioned-curr)))] | |
a60c484e SK |
758 | [peers-all-prev |
759 | (file->peers peers-all-file)] | |
eade8175 SK |
760 | [peers-followers |
761 | (list->set (append peers-followers-prev | |
762 | peers-followers-curr))] | |
a60c484e | 763 | [peers-all |
d3ac9e11 | 764 | (list->set (append peers-mentioned |
eade8175 | 765 | (set->list peers-followers) |
018b2abb | 766 | peers-all-prev))] |
eade8175 SK |
767 | [peers-discovered-followers |
768 | (set-subtract (list->set peers-followers) | |
769 | (list->set peers-followers-prev))] | |
d3ac9e11 SK |
770 | [peers-discovered |
771 | (set-subtract peers-all (list->set peers-all-prev))] | |
018b2abb SK |
772 | [peers-all |
773 | (peers-sort (set->list peers-all))]) | |
d3ac9e11 SK |
774 | (log-info "Known peers mentioned: ~a" (length peers-mentioned)) |
775 | (log-info "Known peers total: ~a" (length peers-all)) | |
eade8175 SK |
776 | (log-info "Discovered ~a new followers:~n~a" |
777 | (set-count peers-discovered-followers) | |
778 | (pretty-format (map | |
779 | (λ (p) (cons (Peer-nick p) | |
780 | (url->string (Peer-uri p)))) | |
781 | (set->list peers-discovered-followers)))) | |
d3ac9e11 SK |
782 | (log-info "Discovered ~a new peers:~n~a" |
783 | (set-count peers-discovered) | |
784 | (pretty-format (map | |
785 | (λ (p) (cons (Peer-nick p) | |
786 | (url->string (Peer-uri p)))) | |
787 | (set->list peers-discovered)))) | |
eade8175 SK |
788 | (peers->file (peers-sort (set->list peers-followers)) |
789 | peers-followers-file) | |
a60c484e SK |
790 | (peers->file peers-mentioned |
791 | peers-mentioned-file) | |
792 | (peers->file peers-all | |
793 | peers-all-file)))] | |
5db4881e SK |
794 | [command |
795 | (eprintf "Error: invalid command: ~v\n" command) | |
796 | (eprintf "Please use the \"--help\" option to see a list of available commands.\n") | |
797 | (exit 1)]) | |
0d3f753c | 798 | (log-writer-stop log-writer)))) |