Commit | Line | Data |
---|---|---|
cd541405 | 1 | #lang typed/racket/no-check |
4764ff89 | 2 | |
1d753430 | 3 | (require openssl/sha1) |
4764ff89 | 4 | (require racket/date) |
9c464d95 | 5 | (require |
d718efc4 | 6 | net/head |
edadb804 | 7 | net/uri-codec |
8da029e4 | 8 | net/url) |
4764ff89 | 9 | |
3c9c8266 SK |
10 | (require (prefix-in info: "info.rkt")) |
11 | ||
78398948 | 12 | (module+ test |
de3ff448 | 13 | (require rackunit)) |
78398948 | 14 | |
98529d3d SK |
15 | (define-type Url |
16 | net/url-structs:url) | |
17 | ||
18 | (define-type Out-Format | |
19 | (U 'single-line | |
20 | 'multi-line)) | |
21 | ||
22 | (define-type Timeline-Order | |
23 | (U 'old->new | |
24 | 'new->old)) | |
25 | ||
7fd20778 SK |
26 | (define-type Result |
27 | (∀ (α β) (U (cons 'ok α) | |
28 | (cons 'error β)))) | |
29 | ||
63805738 SK |
30 | (define-type Download-Result |
31 | (Result (U 'skipped-cached 'downloaded-new) | |
32 | (U 'timeout | |
33 | (Pair 'unsupported-url-scheme String) | |
34 | (Pair 'http-not-ok Positive-Integer) | |
35 | (Pair 'net-error Any) | |
36 | (Pair 'other Any)))) | |
37 | ||
651cf37d SK |
38 | (struct Hist |
39 | ([freq : Nonnegative-Integer] | |
40 | [last : Nonnegative-Integer]) | |
41 | #:transparent) | |
42 | ||
5fef9856 | 43 | (define-type Url-Nick-Hist |
651cf37d SK |
44 | (Immutable-HashTable Url (Immutable-HashTable (Option String) Hist))) |
45 | ||
7296ed94 | 46 | (struct User |
1ecda371 | 47 | ([url : Url] |
3dc554da | 48 | [nick : (Option String)])) |
7296ed94 SK |
49 | |
50 | (struct User-Agent | |
3dc554da SK |
51 | ([user : User] |
52 | [prog : Prog])) | |
7296ed94 SK |
53 | |
54 | (struct Prog | |
3dc554da SK |
55 | ([name : String] |
56 | [version : String])) | |
7296ed94 | 57 | |
b0ff061a | 58 | (struct Msg |
78142acb | 59 | ([ts-epoch : Integer] |
3877a0c4 | 60 | [ts-orig : String] |
0cb1ae9c | 61 | [from : Peer] |
13c11724 | 62 | [text : String] |
b0ff061a | 63 | [mentions : (Listof Peer)])) |
9c464d95 | 64 | |
dbc26280 | 65 | (struct Peer |
b056019b | 66 | ([nick : (Option String)] |
1ecda371 SK |
67 | [url : Url] |
68 | [url-str : String] | |
b056019b | 69 | [comment : (Option String)]) |
d0a0e073 | 70 | #:transparent) |
4764ff89 | 71 | |
7296ed94 SK |
72 | (: prog Prog) |
73 | (define prog | |
74 | (Prog "tt" (info:#%info-lookup 'version))) | |
75 | ||
76 | (: user-default User) | |
77 | (define user-default | |
ab7bf8d5 | 78 | (User (string->url "https://github.com/xandkar/tt") #f)) |
7296ed94 SK |
79 | |
80 | (: user->str (-> User String)) | |
81 | (define (user->str user) | |
ab7bf8d5 SK |
82 | (match-define (User u0 n) user) |
83 | (define u (url->string u0)) | |
7296ed94 | 84 | (if n |
3dc554da SK |
85 | (format "+~a; @~a" u n) |
86 | (format "+~a" u ))) | |
7296ed94 SK |
87 | |
88 | (: user-agent->str (-> User-Agent String)) | |
89 | (define (user-agent->str ua) | |
90 | (match-define (User-Agent u p) ua) | |
91 | (format "~a/~a (~a)" (Prog-name p) (Prog-version p) (user->str u))) | |
92 | ||
93 | (: user->user-agent User) | |
94 | (define (user->user-agent user) | |
95 | (User-Agent user prog)) | |
96 | ||
97 | (: user-agent-str String) | |
98 | (define user-agent-str | |
99 | (user-agent->str (user->user-agent user-default))) | |
100 | ||
101 | (: set-user-agent-str (-> Path-String Void)) | |
102 | (define (set-user-agent-str filename) | |
103 | (set! user-agent-str (user-agent->str (user->user-agent (file->user filename)))) | |
104 | (log-info "User-Agent string is now set to: ~v" user-agent-str)) | |
105 | ||
106 | (: file->user (-> Path-String User)) | |
107 | (define (file->user filename) | |
108 | (if (file-exists? filename) | |
dd098ae3 | 109 | (match (file->peers filename) |
7296ed94 SK |
110 | [(list p) |
111 | (log-info | |
112 | "User-Agent. Found one peer in file: ~v. Using the found peer: ~a" | |
113 | filename | |
114 | (peer->str p)) | |
115 | (peer->user p)] | |
116 | [(list* p _) | |
117 | (log-warning | |
118 | "User-Agent. Multiple peers in file: ~v. Picking arbitrary: ~a" | |
119 | filename | |
120 | (peer->str p)) | |
121 | (peer->user p)] | |
122 | ['() | |
123 | (log-warning | |
124 | "User-Agent. No peers found in file: ~v. Using the default user: ~a" | |
125 | filename | |
126 | user-default) | |
127 | user-default]) | |
128 | (begin | |
129 | (log-warning | |
130 | "User-Agent. File doesn't exist: ~v. Using the default user: ~a" | |
131 | filename | |
132 | user-default) | |
133 | user-default))) | |
134 | ||
ab7bf8d5 | 135 | (: peer->user (-> Peer User)) |
7296ed94 | 136 | (define (peer->user p) |
ab7bf8d5 | 137 | (match-define (Peer n u _ _) p) |
7296ed94 SK |
138 | (User u n)) |
139 | ||
0cb1ae9c SK |
140 | (: peers-equal? (-> Peer Peer Boolean)) |
141 | (define (peers-equal? p1 p2) | |
1ecda371 SK |
142 | (equal? (Peer-url-str p1) |
143 | (Peer-url-str p2))) | |
0cb1ae9c SK |
144 | |
145 | (: peer-hash (-> Peer Fixnum)) | |
146 | (define (peer-hash p) | |
1ecda371 | 147 | (equal-hash-code (Peer-url-str p))) |
0cb1ae9c | 148 | |
e2840743 SK |
149 | (define-custom-set-types peers |
150 | #:elem? Peer? | |
0cb1ae9c SK |
151 | peers-equal? |
152 | peer-hash) | |
e2840743 SK |
153 | ; XXX Without supplying above explicit hash procedure, we INTERMITTENTLY get |
154 | ; the following contract violations: | |
155 | ; | |
156 | ; custom-elem-contents: contract violation | |
157 | ; expected: custom-elem? | |
158 | ; given: #f | |
159 | ; context...: | |
160 | ; /usr/share/racket/collects/racket/private/set-types.rkt:104:0: custom-set->list | |
161 | ; /home/siraaj/proj/pub/tt/tt.rkt:716:0: crawl | |
162 | ; /usr/share/racket/collects/racket/cmdline.rkt:191:51 | |
163 | ; body of (submod "/home/siraaj/proj/pub/tt/tt.rkt" main) | |
164 | ; | |
165 | ; TODO Investigate why and make a minimal reproducible test case. | |
166 | ||
dd098ae3 SK |
167 | (: peers-merge (-> (Listof Peer) * (Listof Peer))) |
168 | (define (peers-merge . peer-sets) | |
6cedad92 SK |
169 | (define (merge-2 p1 p2) |
170 | (match* (p1 p2) | |
171 | [((Peer n1 _ _ _) (Peer n2 _ _ _)) #:when (and n1 n2) p1] ; TODO compare which is more-common? | |
172 | [((Peer #f _ _ _) (Peer #f _ _ _)) p1] ; TODO update with most-common nick? | |
173 | [((Peer n1 _ _ _) (Peer #f _ _ _)) p1] | |
174 | [((Peer #f _ _ _) (Peer n2 _ _ _)) p2])) | |
2a7d82d7 | 175 | (: merge-n (-> (Listof Peer) Peer)) |
6cedad92 | 176 | (define (merge-n peers) |
50f0609d SK |
177 | (match peers |
178 | ['() (raise 'impossible)] | |
179 | [(list p) p] | |
6cedad92 SK |
180 | [(list* p1 p2 ps) (merge-n (cons (merge-2 p1 p2) ps))])) |
181 | (sort (map merge-n (group-by Peer-url-str (append* peer-sets))) | |
dd098ae3 SK |
182 | (match-lambda** |
183 | [((Peer _ _ u1 _) (Peer _ _ u2 _)) (string<? u1 u2)]))) | |
50f0609d SK |
184 | |
185 | (module+ test | |
186 | (let* ([u1 "http://foo/bar"] | |
187 | [u2 "http://baz/quux"] | |
188 | [p1 (Peer #f (string->url u1) u1 #f)] | |
189 | [p2 (Peer "a" (string->url u1) u1 #f)] | |
190 | [p3 (Peer "b" (string->url u2) u2 #f)] | |
dd098ae3 SK |
191 | [s1 (list p1)] |
192 | [s2 (list p2 p3)]) | |
193 | (check-equal? (list p3 p2) (peers-merge s1 s2)) | |
194 | (check-equal? (list p3 p2) (peers-merge s2 s1)))) | |
50f0609d | 195 | |
edadb804 SK |
196 | (: tt-home-dir Path-String) |
197 | (define tt-home-dir (build-path (expand-user-path "~") ".tt")) | |
198 | ||
96412b0a SK |
199 | (: pub-peers-dir Path-String) |
200 | (define pub-peers-dir (build-path tt-home-dir "peers")) | |
201 | ||
9a346534 | 202 | (: concurrent-filter-map (∀ (α β) (-> Natural (-> α β) (Listof α) (Listof β)))) |
78142acb | 203 | (define (concurrent-filter-map num-workers f xs) |
dad4504d | 204 | ; TODO preserve order of elements OR communicate that reorder is expected |
a239a233 | 205 | ; TODO switch from mailboxes to channels |
895a32cf SK |
206 | (define (make-worker id f) |
207 | (define parent (current-thread)) | |
208 | (λ () | |
a9511f7c SK |
209 | (define self : Thread (current-thread)) |
210 | (: work (∀ (α) (-> α))) | |
895a32cf SK |
211 | (define (work) |
212 | (thread-send parent (cons 'next self)) | |
213 | (match (thread-receive) | |
c562bea3 SK |
214 | ['done (thread-send parent (cons 'exit id))] |
215 | [(cons 'unit x) (begin | |
216 | (define y (f x)) | |
217 | (when y (thread-send parent (cons 'result y))) | |
218 | (work))])) | |
895a32cf | 219 | (work))) |
a9511f7c | 220 | (: dispatch (∀ (α β) (-> (Listof Nonnegative-Integer) (Listof α) (Listof β)))) |
895a32cf SK |
221 | (define (dispatch ws xs ys) |
222 | (if (empty? ws) | |
f1493e49 SK |
223 | ys |
224 | (match (thread-receive) | |
c562bea3 SK |
225 | [(cons 'exit w) (dispatch (remove w ws =) xs ys)] |
226 | [(cons 'result y) (dispatch ws xs (cons y ys))] | |
227 | [(cons 'next thd) (match xs | |
228 | ['() (begin | |
229 | (thread-send thd 'done) | |
230 | (dispatch ws xs ys))] | |
231 | [(cons x xs) (begin | |
232 | (thread-send thd (cons 'unit x)) | |
233 | (dispatch ws xs ys))])]))) | |
78142acb | 234 | (define workers (range num-workers)) |
9926c9a9 SK |
235 | (define threads (map (λ (id) (thread (make-worker id f))) workers)) |
236 | (define results (dispatch workers xs '())) | |
895a32cf SK |
237 | (for-each thread-wait threads) |
238 | results) | |
239 | ||
dad4504d | 240 | (module+ test |
de3ff448 SK |
241 | (let* ([f (λ (x) (if (even? x) x #f))] |
242 | [xs (range 11)] | |
243 | [actual (sort (concurrent-filter-map 10 f xs) <)] | |
244 | [expected (sort ( filter-map f xs) <)]) | |
c562bea3 | 245 | (check-equal? actual expected "concurrent-filter-map"))) |
dad4504d | 246 | |
98529d3d | 247 | (: msg-print (-> Out-Format Integer Msg Void)) |
3d042e75 SK |
248 | (define msg-print |
249 | (let* ([colors (vector 36 33)] | |
250 | [n (vector-length colors)]) | |
251 | (λ (out-format color-i msg) | |
01e4c499 | 252 | (let ([color (vector-ref colors (modulo color-i n))] |
0cb1ae9c | 253 | [nick (Peer-nick (Msg-from msg))] |
1ecda371 | 254 | [url (Peer-url-str (Msg-from msg))] |
0cb1ae9c | 255 | [text (Msg-text msg)]) |
3d042e75 | 256 | (match out-format |
01e4c499 | 257 | ['single-line |
1ecda371 | 258 | (let ([nick (if nick nick url)]) |
13c11724 SK |
259 | (printf "~a \033[1;37m<~a>\033[0m \033[0;~am~a\033[0m~n" |
260 | (parameterize | |
261 | ([date-display-format 'iso-8601]) | |
b0ff061a | 262 | (date->string (seconds->date (Msg-ts-epoch msg)) #t)) |
13c11724 | 263 | nick color text))] |
01e4c499 | 264 | ['multi-line |
13c11724 SK |
265 | (let ([nick (if nick (string-append nick " ") "")]) |
266 | (printf "~a (~a)~n\033[1;37m<~a~a>\033[0m~n\033[0;~am~a\033[0m~n~n" | |
267 | (parameterize | |
268 | ([date-display-format 'rfc2822]) | |
b0ff061a SK |
269 | (date->string (seconds->date (Msg-ts-epoch msg)) #t)) |
270 | (Msg-ts-orig msg) | |
1ecda371 | 271 | nick url color text))]))))) |
e96264cc | 272 | |
3877a0c4 SK |
273 | (: rfc3339->epoch (-> String (Option Nonnegative-Integer))) |
274 | (define rfc3339->epoch | |
275 | (let ([re (pregexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2})(:([0-9]{2}))?(\\.[0-9]+)?(Z|([+-])([0-9]{1,2}):?([0-9]{2}))?$")]) | |
276 | (λ (ts) | |
277 | (match (regexp-match re ts) | |
278 | [(list _wholething yyyy mm dd HH MM _:SS SS _fractional tz-whole tz-sign tz-HH tz-MM) | |
279 | (let* | |
280 | ([tz-offset | |
281 | (match* (tz-whole tz-sign tz-HH tz-MM) | |
282 | [("Z" #f #f #f) | |
283 | 0] | |
284 | [(_ (or "-" "+") (? identity h) (? identity m)) | |
285 | (let ([h (string->number h)] | |
286 | [m (string->number m)] | |
287 | ; Reverse to get back to UTC: | |
288 | [op (match tz-sign ["+" -] ["-" +])]) | |
289 | (op 0 (+ (* 60 m) (* 60 (* 60 h)))))] | |
290 | [(a b c d) | |
291 | (log-warning "Impossible TZ string: ~v, components: ~v ~v ~v ~v" tz-whole a b c d) | |
292 | 0])] | |
293 | [ts-orig ts] | |
294 | [local-time? #f] | |
295 | [ts-epoch (find-seconds (if SS (string->number SS) 0) | |
296 | (string->number MM) | |
297 | (string->number HH) | |
298 | (string->number dd) | |
299 | (string->number mm) | |
300 | (string->number yyyy) | |
301 | local-time?)]) | |
302 | (+ ts-epoch tz-offset))] | |
303 | [_ | |
b8b29fbb | 304 | (log-debug "Invalid timestamp: ~v" ts) |
3877a0c4 SK |
305 | #f])))) |
306 | ||
0cb1ae9c | 307 | (: str->msg (-> Peer String (Option Msg))) |
b4689464 | 308 | (define str->msg |
3877a0c4 | 309 | (let ([re (pregexp "^([^\\s\t]+)[\\s\t]+(.*)$")]) |
0cb1ae9c SK |
310 | (λ (from str) |
311 | (define from-str (peer->str from)) | |
d3ac9e11 | 312 | (define str-head (substring str 0 (min 100 (string-length str)))) |
b4689464 SK |
313 | (with-handlers* |
314 | ([exn:fail? | |
315 | (λ (e) | |
b8b29fbb | 316 | (log-debug |
9c464d95 | 317 | "Failed to parse msg: ~v, from: ~v, at: ~v, because: ~v" |
0cb1ae9c | 318 | str-head from-str e) |
b4689464 SK |
319 | #f)]) |
320 | (match (regexp-match re str) | |
3877a0c4 SK |
321 | [(list _wholething ts-orig text) |
322 | (let ([ts-epoch (rfc3339->epoch ts-orig)]) | |
323 | (if ts-epoch | |
13c11724 SK |
324 | (let ([mentions |
325 | (filter-map | |
326 | (λ (m) (match (regexp-match #px"@<([^>]+)>" m) | |
1ecda371 SK |
327 | [(list _wholething nick-url) |
328 | (str->peer nick-url)])) | |
13c11724 | 329 | (regexp-match* #px"@<[^\\s]+([\\s]+)?[^>]+>" text))]) |
0cb1ae9c | 330 | (Msg ts-epoch ts-orig from text mentions)) |
3877a0c4 | 331 | (begin |
b8b29fbb | 332 | (log-debug |
0cb1ae9c SK |
333 | "Msg rejected due to invalid timestamp. From:~v. Line:~v" |
334 | from-str str-head) | |
3877a0c4 | 335 | #f)))] |
b4689464 | 336 | [_ |
0cb1ae9c | 337 | (log-debug "Non-msg line. From:~v. Line:~v" from-str str-head) |
b4689464 | 338 | #f]))))) |
88d50b3e | 339 | |
63afa259 | 340 | (module+ test |
13c11724 | 341 | ; TODO Test for when missing-nick case |
b4689464 SK |
342 | (let* ([tzs (for*/list ([d '("-" "+")] |
343 | [h '("5" "05")] | |
344 | [m '("00" ":00" "57" ":57")]) | |
345 | (string-append d h m))] | |
346 | [tzs (list* "" "Z" tzs)]) | |
347 | (for* ([n '("fake-nick")] | |
1ecda371 | 348 | [u '("http://fake-url")] |
50f0609d | 349 | [p (list (Peer n (string->url u) u #f))] |
b4689464 SK |
350 | [s '("" ":10")] |
351 | [f '("" ".1337")] | |
352 | [z tzs] | |
353 | [sep (list "\t" " ")] | |
354 | [txt '("foo bar baz" "'jaz poop bear giraffe / tea" "@*\"``")]) | |
355 | (let* ([ts (string-append "2020-11-18T22:22" | |
356 | (if (non-empty-string? s) s ":00") | |
357 | z)] | |
0cb1ae9c | 358 | [m (str->msg p (string-append ts sep txt))]) |
b4689464 | 359 | (check-not-false m) |
0cb1ae9c | 360 | (check-equal? (Msg-from m) p) |
b0ff061a SK |
361 | (check-equal? (Msg-text m) txt) |
362 | (check-equal? (Msg-ts-orig m) ts (format "Given: ~v" ts)) | |
b4689464 SK |
363 | ))) |
364 | ||
de3ff448 SK |
365 | (let* ([ts "2020-11-18T22:22:09-0500"] |
366 | [tab " "] | |
367 | [text "Lorem ipsum"] | |
368 | [nick "foo"] | |
1ecda371 SK |
369 | [url "http://bar/"] |
370 | [peer (Peer nick (string->url url) url #f)] | |
0cb1ae9c SK |
371 | [actual (str->msg peer (string-append ts tab text))] |
372 | [expected (Msg 1605756129 ts peer text '())]) | |
c562bea3 | 373 | (check-equal? |
b0ff061a SK |
374 | (Msg-ts-epoch actual) |
375 | (Msg-ts-epoch expected) | |
78142acb | 376 | "str->msg ts-epoch") |
3877a0c4 | 377 | (check-equal? |
b0ff061a SK |
378 | (Msg-ts-orig actual) |
379 | (Msg-ts-orig expected) | |
3877a0c4 | 380 | "str->msg ts-orig") |
c562bea3 | 381 | (check-equal? |
0cb1ae9c SK |
382 | (Peer-nick (Msg-from actual)) |
383 | (Peer-nick (Msg-from expected)) | |
c562bea3 SK |
384 | "str->msg nick") |
385 | (check-equal? | |
1ecda371 SK |
386 | (Peer-url (Msg-from actual)) |
387 | (Peer-url (Msg-from expected)) | |
388 | "str->msg url") | |
0cb1ae9c | 389 | (check-equal? |
1ecda371 SK |
390 | (Peer-url-str (Msg-from actual)) |
391 | (Peer-url-str (Msg-from expected)) | |
392 | "str->msg url-str") | |
c562bea3 | 393 | (check-equal? |
b0ff061a SK |
394 | (Msg-text actual) |
395 | (Msg-text expected) | |
c562bea3 | 396 | "str->msg text"))) |
63afa259 | 397 | |
98529d3d | 398 | (: str->lines (-> String (Listof String))) |
e96264cc SK |
399 | (define (str->lines str) |
400 | (string-split str (regexp "[\r\n]+"))) | |
401 | ||
63afa259 | 402 | (module+ test |
de3ff448 | 403 | (check-equal? (str->lines "abc\ndef\n\nghi") '("abc" "def" "ghi"))) |
63afa259 | 404 | |
cd868590 SK |
405 | ; TODO Should return 2 things: 1) msgs; 2) metadata parsed from comments |
406 | ; TODO Update peer nick based on metadata? | |
0cb1ae9c SK |
407 | (: str->msgs (-> Peer String (Listof Msg))) |
408 | (define (str->msgs peer str) | |
409 | (filter-map (λ (line) (str->msg peer line)) | |
410 | (filter-comments (str->lines str)))) | |
4764ff89 | 411 | |
edadb804 SK |
412 | (: cache-dir Path-String) |
413 | (define cache-dir (build-path tt-home-dir "cache")) | |
414 | ||
d718efc4 SK |
415 | (define cache-object-dir (build-path cache-dir "objects")) |
416 | ||
edadb804 | 417 | (: url->cache-file-path-v1 (-> Url Path-String)) |
1ecda371 | 418 | (define (url->cache-file-path-v1 url) |
edadb804 SK |
419 | (define (hash-sha1 str) : (-> String String) |
420 | (define in (open-input-string str)) | |
421 | (define digest (sha1 in)) | |
422 | (close-input-port in) | |
423 | digest) | |
1ecda371 | 424 | (build-path cache-object-dir (hash-sha1 (url->string url)))) |
edadb804 SK |
425 | |
426 | (: url->cache-file-path-v2 (-> Url Path-String)) | |
1ecda371 SK |
427 | (define (url->cache-file-path-v2 url) |
428 | (build-path cache-object-dir (uri-encode (url->string url)))) | |
d718efc4 | 429 | |
0cb1ae9c SK |
430 | (define url->cache-object-path |
431 | url->cache-file-path-v2) | |
d3ac9e11 | 432 | |
1ecda371 SK |
433 | (define (url->cache-etag-path url) |
434 | (build-path cache-dir "etags" (uri-encode (url->string url)))) | |
d718efc4 | 435 | |
1ecda371 SK |
436 | (define (url->cache-lmod-path url) |
437 | (build-path cache-dir "lmods" (uri-encode (url->string url)))) | |
9c464d95 | 438 | |
1ecda371 SK |
439 | (: url-read-cached (-> Url (Option String))) |
440 | (define (url-read-cached url) | |
441 | (define path-v1 (url->cache-file-path-v1 url)) | |
442 | (define path-v2 (url->cache-file-path-v2 url)) | |
edadb804 SK |
443 | (when (file-exists? path-v1) |
444 | (rename-file-or-directory path-v1 path-v2 #t)) | |
445 | (if (file-exists? path-v2) | |
446 | (file->string path-v2) | |
0e16a46c | 447 | (begin |
1ecda371 | 448 | (log-debug "Cache file not found for URL: ~a" (url->string url)) |
fee11be9 | 449 | #f))) |
4214c0f3 | 450 | |
b056019b SK |
451 | (: str->url (-> String (Option String))) |
452 | (define (str->url s) | |
453 | (with-handlers* | |
454 | ([exn:fail? (λ (e) #f)]) | |
455 | (string->url s))) | |
a60c484e | 456 | |
0cb1ae9c SK |
457 | (: peer->str (-> Peer String)) |
458 | (define (peer->str peer) | |
459 | (match-define (Peer n _ u c) peer) | |
460 | (format "~a~a~a" | |
461 | (if n (format "~a " n) "") | |
462 | u | |
463 | (if c (format " # ~a" c) ""))) | |
464 | ||
465 | (: str->peer (-> String (Option Peer))) | |
dbc26280 SK |
466 | (define (str->peer str) |
467 | (log-debug "Parsing peer string: ~v" str) | |
b056019b SK |
468 | (match |
469 | (regexp-match | |
470 | #px"(([^\\s\t]+)[\\s\t]+)?([a-zA-Z]+://[^\\s\t]*)[\\s\t]*(#\\s*(.*))?" | |
471 | str) | |
472 | [(list _wholething | |
473 | _nick-with-space | |
474 | nick | |
475 | url | |
476 | _comment-with-hash | |
477 | comment) | |
478 | (match (str->url url) | |
479 | [#f | |
1ecda371 | 480 | (log-error "Invalid URL in peer string: ~v" str) |
b056019b | 481 | #f] |
e2840743 SK |
482 | [url |
483 | (Peer nick url (url->string url) comment)])] | |
b056019b | 484 | [_ |
b8b29fbb | 485 | (log-debug "Invalid peer string: ~v" str) |
b056019b | 486 | #f])) |
13c11724 | 487 | |
b056019b SK |
488 | (module+ test |
489 | (check-equal? | |
490 | (str->peer "foo http://bar/file.txt # some rando") | |
e2840743 | 491 | (Peer "foo" (str->url "http://bar/file.txt") "http://bar/file.txt" "some rando")) |
b056019b SK |
492 | (check-equal? |
493 | (str->peer "http://bar/file.txt # some rando") | |
e2840743 | 494 | (Peer #f (str->url "http://bar/file.txt") "http://bar/file.txt" "some rando")) |
b056019b SK |
495 | (check-equal? |
496 | (str->peer "http://bar/file.txt #") | |
e2840743 | 497 | (Peer #f (str->url "http://bar/file.txt") "http://bar/file.txt" "")) |
b056019b SK |
498 | (check-equal? |
499 | (str->peer "http://bar/file.txt#") ; XXX URLs can have #s | |
e2840743 | 500 | (Peer #f (str->url "http://bar/file.txt#") "http://bar/file.txt#" #f)) |
b056019b SK |
501 | (check-equal? |
502 | (str->peer "http://bar/file.txt") | |
e2840743 | 503 | (Peer #f (str->url "http://bar/file.txt") "http://bar/file.txt" #f)) |
b056019b SK |
504 | (check-equal? |
505 | (str->peer "foo http://bar/file.txt") | |
e2840743 | 506 | (Peer "foo" (str->url "http://bar/file.txt") "http://bar/file.txt" #f)) |
b056019b SK |
507 | (check-equal? |
508 | (str->peer "foo bar # baz") | |
509 | #f) | |
510 | (check-equal? | |
511 | (str->peer "foo bar://baz # quux") | |
e2840743 | 512 | (Peer "foo" (str->url "bar://baz") "bar://baz" "quux")) |
b056019b SK |
513 | (check-equal? |
514 | (str->peer "foo bar//baz # quux") | |
515 | #f)) | |
9c464d95 | 516 | |
98529d3d | 517 | (: filter-comments (-> (Listof String) (Listof String))) |
9c464d95 SK |
518 | (define (filter-comments lines) |
519 | (filter-not (λ (line) (string-prefix? line "#")) lines)) | |
520 | ||
dd098ae3 | 521 | (: str->peers (-> String (Listof Peer))) |
dbc26280 | 522 | (define (str->peers str) |
dd098ae3 | 523 | (filter-map str->peer (filter-comments (str->lines str)))) |
9c464d95 | 524 | |
dd098ae3 | 525 | (: peers->file (-> (Listof Peers) Path-String Void)) |
a60c484e | 526 | (define (peers->file peers path) |
96412b0a | 527 | (make-parent-directory* path) |
a60c484e | 528 | (display-lines-to-file |
0cb1ae9c | 529 | (map peer->str |
dd098ae3 | 530 | (sort peers |
e2840743 SK |
531 | (match-lambda** |
532 | [((Peer n1 _ _ _) (Peer n2 _ _ _)) | |
533 | (string<? (if n1 n1 "") | |
534 | (if n2 n2 ""))]))) | |
a60c484e SK |
535 | path |
536 | #:exists 'replace)) | |
537 | ||
dd098ae3 | 538 | (: file->peers (-> Path-String (Listof Peer))) |
d0a0e073 SK |
539 | (define (file->peers file-path) |
540 | (if (file-exists? file-path) | |
541 | (str->peers (file->string file-path)) | |
542 | (begin | |
a60c484e | 543 | (log-warning "File does not exist: ~v" (path->string file-path)) |
dd098ae3 | 544 | '()))) |
9c464d95 | 545 | |
9c5e4499 SK |
546 | (define re-rfc2822 |
547 | #px"^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0-9]{2}) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) ([0-2][0-9]):([0-6][0-9]):([0-6][0-9]) GMT") | |
548 | ||
549 | (: b->n (-> Bytes (Option Number))) | |
550 | (define (b->n b) | |
551 | (string->number (bytes->string/utf-8 b))) | |
552 | ||
553 | (: mon->num (-> Bytes Natural)) | |
554 | (define/match (mon->num mon) | |
555 | [(#"Jan") 1] | |
556 | [(#"Feb") 2] | |
557 | [(#"Mar") 3] | |
558 | [(#"Apr") 4] | |
559 | [(#"May") 5] | |
560 | [(#"Jun") 6] | |
561 | [(#"Jul") 7] | |
562 | [(#"Aug") 8] | |
563 | [(#"Sep") 9] | |
564 | [(#"Oct") 10] | |
565 | [(#"Nov") 11] | |
566 | [(#"Dec") 12]) | |
567 | ||
568 | (: rfc2822->epoch (-> Bytes (Option Nonnegative-Integer))) | |
569 | (define (rfc2822->epoch timestamp) | |
570 | (match (regexp-match re-rfc2822 timestamp) | |
571 | [(list _ _ dd mo yyyy HH MM SS) | |
572 | #:when (and dd mo yyyy HH MM SS) | |
573 | (find-seconds (b->n SS) | |
574 | (b->n MM) | |
575 | (b->n HH) | |
576 | (b->n dd) | |
577 | (mon->num mo) | |
578 | (b->n yyyy) | |
579 | #f)] | |
580 | [_ | |
581 | #f])) | |
582 | ||
d718efc4 SK |
583 | (: header-get (-> (Listof Bytes) Bytes (Option Bytes))) |
584 | (define (header-get headers name) | |
585 | (match (filter-map (curry extract-field name) headers) | |
586 | [(list val) val] | |
587 | [_ #f])) | |
588 | ||
1ecda371 | 589 | (: url-download-http-from-port |
7fd20778 SK |
590 | (-> Url (Listof (U Bytes String)) Input-Port |
591 | (U 'skipped-cached 'downloaded-new))) ; TODO 'ok|'error ? | |
1ecda371 | 592 | (define (url-download-http-from-port u headers body-input) |
bb208ad5 SK |
593 | ; TODO Update message db from here? or where? |
594 | ; - 1st try can just be an in-memory set that gets written-to | |
595 | ; and read-from disk as a whole. | |
7fd20778 | 596 | (define u-str (url->string u)) |
1ecda371 | 597 | (log-debug "url-download-http-from-port ~v into ~v" u-str cached-object-path) |
d718efc4 SK |
598 | (define cached-object-path (url->cache-object-path u)) |
599 | (define cached-etag-path (url->cache-etag-path u)) | |
600 | (define cached-lmod-path (url->cache-lmod-path u)) | |
7fd20778 SK |
601 | (define etag (header-get headers #"ETag")) |
602 | (define lmod (header-get headers #"Last-Modified")) | |
603 | (define lmod-curr (if lmod (rfc2822->epoch lmod) #f)) | |
604 | (define lmod-prev (if (file-exists? cached-lmod-path) | |
605 | (rfc2822->epoch (file->bytes cached-lmod-path)) | |
606 | #f)) | |
607 | (log-debug "lmod-curr:~v lmod-prev:~v" lmod-curr lmod-prev) | |
608 | (define cached? | |
609 | (or (and etag | |
610 | (file-exists? cached-etag-path) | |
611 | (bytes=? etag (file->bytes cached-etag-path)) | |
612 | (begin | |
613 | (log-debug "ETags match, skipping the rest of ~v" u-str) | |
614 | #t)) | |
615 | (and lmod-curr | |
616 | lmod-prev | |
617 | (<= lmod-curr lmod-prev) | |
618 | (begin | |
619 | (log-debug "Last-Modified <= current skipping the rest of ~v" u-str) | |
620 | #t)))) | |
621 | (if (not cached?) | |
68bbd2e9 SK |
622 | (begin |
623 | (log-debug | |
624 | "Downloading the rest of ~v. ETag: ~a, Last-Modified: ~v" | |
625 | u-str etag lmod) | |
626 | (make-parent-directory* cached-object-path) | |
627 | (make-parent-directory* cached-etag-path) | |
628 | (make-parent-directory* cached-lmod-path) | |
629 | (call-with-output-file cached-object-path | |
630 | (curry copy-port body-input) | |
631 | #:exists 'replace) | |
632 | (when etag | |
633 | (display-to-file etag cached-etag-path #:exists 'replace)) | |
634 | (when lmod | |
635 | (display-to-file lmod cached-lmod-path #:exists 'replace)) | |
636 | 'downloaded-new) | |
637 | 'skipped-cached)) | |
7fd20778 | 638 | |
1ecda371 SK |
639 | (: url-download-http (-> Positive-Float Url Download-Result)) |
640 | (define (url-download-http timeout u) | |
f65d6338 | 641 | (define u-str (url->string u)) |
f65d6338 SK |
642 | (define timeout-chan (make-channel)) |
643 | (define result-chan (make-channel)) | |
644 | (define timeout-thread | |
645 | (thread (λ () | |
646 | ; Doing this instead of sync/timeout to distinguish error values, | |
647 | ; rather than just have #f to work with. | |
648 | (sleep timeout) | |
7fd20778 | 649 | (channel-put timeout-chan '(error . timeout))))) |
f65d6338 SK |
650 | (define result-thread |
651 | (thread (λ () | |
f65d6338 SK |
652 | (define result |
653 | (with-handlers | |
7fd20778 SK |
654 | ; TODO Maybe name each known errno? (exn:fail:network:errno-errno e) |
655 | ([exn:fail:network? | |
656 | (λ (e) `(error . (net-error . ,e)))] | |
657 | [exn? | |
658 | (λ (e) `(error . (other . ,e)))]) | |
f65d6338 SK |
659 | (define-values (status-line headers body-input) |
660 | (http-sendrecv/url | |
661 | u | |
7296ed94 | 662 | #:headers (list (format "User-Agent: ~a" user-agent-str)))) |
bb208ad5 SK |
663 | (log-debug "headers: ~v" headers) |
664 | (log-debug "status-line: ~v" status-line) | |
665 | (define status | |
666 | (string->number (second (string-split (bytes->string/utf-8 status-line))))) | |
667 | (log-debug "status: ~v" status) | |
668 | (let ([result | |
669 | ; TODO Handle redirects. | |
670 | ; TODO Should a redirect update a peer URL? | |
671 | (match status | |
672 | [200 | |
1ecda371 | 673 | `(ok . ,(url-download-http-from-port u headers body-input))] |
bb208ad5 SK |
674 | [_ |
675 | `(error . (http-not-ok . ,status))])]) | |
676 | (close-input-port body-input) | |
677 | result))) | |
f65d6338 | 678 | (channel-put result-chan result)))) |
bb208ad5 | 679 | (define result (sync timeout-chan result-chan)) |
f65d6338 SK |
680 | (kill-thread result-thread) |
681 | (kill-thread timeout-thread) | |
bb208ad5 | 682 | result) |
4764ff89 | 683 | |
1ecda371 SK |
684 | (: url-download (-> Positive-Float Url Download-Result)) |
685 | (define (url-download timeout u) | |
63805738 SK |
686 | (match (url-scheme u) |
687 | ; TODO Support Gopher. | |
688 | [(or "http" "https") | |
1ecda371 | 689 | (url-download-http timeout u)] |
63805738 SK |
690 | [scheme |
691 | `(error . (unsupported-url-scheme . ,scheme))])) | |
692 | ||
98529d3d | 693 | (: timeline-print (-> Out-Format (Listof Msg) Void)) |
b201e854 | 694 | (define (timeline-print out-format timeline) |
0cb1ae9c SK |
695 | (match timeline |
696 | ['() | |
697 | (void)] | |
698 | [(cons first-msg _) | |
699 | (void (foldl (match-lambda** | |
700 | [((and m (Msg _ _ from _ _)) (cons prev-from i)) | |
701 | (let ([i (if (peers-equal? prev-from from) i (+ 1 i))]) | |
702 | (msg-print out-format i m) | |
703 | (cons from i))]) | |
704 | (cons (Msg-from first-msg) 0) | |
705 | timeline))])) | |
4764ff89 | 706 | |
dbc26280 | 707 | (: peer->msgs (-> Peer (Listof Msg))) |
b056019b | 708 | (define (peer->msgs peer) |
1ecda371 SK |
709 | (match-define (Peer nick url url-str _) peer) |
710 | (log-debug "Reading peer nick:~v url:~v" nick url-str) | |
711 | (define msgs-data (url-read-cached url)) | |
4a23fd99 | 712 | ; TODO Expire cache |
fee11be9 | 713 | (if msgs-data |
0cb1ae9c | 714 | (str->msgs peer msgs-data) |
fee11be9 | 715 | '())) |
4214c0f3 | 716 | |
7fd20778 SK |
717 | (: peer-download |
718 | (-> Positive-Float Peer | |
719 | (Result (U 'skipped-cached 'downloaded-new) | |
720 | Any))) | |
f65d6338 | 721 | (define (peer-download timeout peer) |
1ecda371 | 722 | (match-define (Peer nick url u _) peer) |
7fd20778 SK |
723 | (log-info "Download BEGIN URL:~a" u) |
724 | (define-values (results _tm-cpu-ms tm-real-ms _tm-gc-ms) | |
1ecda371 | 725 | (time-apply url-download (list timeout url))) |
7fd20778 SK |
726 | (define result (car results)) |
727 | (log-info "Download END in ~a seconds, URL:~a, result:~s" | |
728 | (/ tm-real-ms 1000.0) | |
729 | u | |
730 | result) | |
731 | result) | |
4214c0f3 | 732 | |
dd098ae3 | 733 | (: timeline-download (-> Integer Positive-Float (Listof Peer) Void)) |
f65d6338 | 734 | (define (timeline-download num-workers timeout peers) |
7fd20778 SK |
735 | (define results |
736 | (concurrent-filter-map num-workers | |
737 | (λ (p) (cons p (peer-download timeout p))) | |
dd098ae3 | 738 | peers)) |
d54812ea SK |
739 | (define peers-ok |
740 | (filter-map (match-lambda | |
741 | [(cons p (cons 'ok _)) p] | |
742 | [(cons _ (cons 'error e)) #f]) | |
743 | results)) | |
744 | (define peers-err | |
745 | (filter-map (match-lambda | |
746 | [(cons _ (cons 'ok _)) | |
747 | #f] | |
748 | [(cons p (cons 'error e)) | |
749 | (struct-copy Peer p [comment (format "~s" e)])]) | |
750 | results)) | |
96412b0a SK |
751 | (peers->file peers-ok (build-path tt-home-dir "peers-last-downloaded-ok.txt")) |
752 | (peers->file peers-err (build-path tt-home-dir "peers-last-downloaded-err.txt"))) | |
9a6a9f9a | 753 | |
dd098ae3 | 754 | (: peers->timeline (-> (Listof Peer) (Listof Msg))) |
a60c484e | 755 | (define (peers->timeline peers) |
dd098ae3 | 756 | (append* (filter-map peer->msgs peers))) |
a60c484e | 757 | |
e2840743 | 758 | (: timeline-sort (-> (Listof Msg) timeline-order (Listof Msgs))) |
a60c484e | 759 | (define (timeline-sort msgs order) |
a4899240 SK |
760 | (define cmp (match order |
761 | ['old->new <] | |
762 | ['new->old >])) | |
a60c484e SK |
763 | (sort msgs (λ (a b) (cmp (Msg-ts-epoch a) |
764 | (Msg-ts-epoch b))))) | |
4764ff89 | 765 | |
dd098ae3 | 766 | (: paths->peers (-> (Listof String) (Listof Peer))) |
d0a0e073 SK |
767 | (define (paths->peers paths) |
768 | (let* ([paths (match paths | |
769 | ['() | |
96412b0a | 770 | (let ([peer-refs-file (build-path tt-home-dir "following.txt")]) |
d0a0e073 SK |
771 | (log-debug |
772 | "No peer ref file paths provided, defaulting to ~v" | |
773 | (path->string peer-refs-file)) | |
774 | (list peer-refs-file))] | |
775 | [paths | |
776 | (log-debug "Peer ref file paths provided: ~v" paths) | |
777 | (map string->path paths)])] | |
dd098ae3 SK |
778 | [peers (apply peers-merge (map file->peers paths))]) |
779 | (log-info "Read-in ~a peers." (length peers)) | |
e2840743 | 780 | peers)) |
d0a0e073 | 781 | |
50f0609d SK |
782 | (: cache-filename->peer (-> Path-String (Option Peer))) |
783 | (define (cache-filename->peer filename) | |
784 | (define nick #f) ; TODO Look it up in the nick-db when it exists. | |
785 | (define url-str (uri-decode (path->string filename))) ; TODO Can these crash? | |
786 | (match (str->url url-str) | |
787 | [#f #f] | |
788 | [url (Peer nick url url-str #f)])) | |
789 | ||
dd098ae3 | 790 | (: peers-cached (-> (Listof Peer))) |
50f0609d SK |
791 | (define (peers-cached) |
792 | ; TODO Expire cache? | |
dd098ae3 | 793 | (filter-map cache-filename->peer (directory-list cache-object-dir))) |
50f0609d | 794 | |
dd098ae3 | 795 | (: peers-mentioned (-> (Listof Msg) (Listof Peer))) |
50f0609d | 796 | (define (peers-mentioned msgs) |
dd098ae3 | 797 | (append* (map Msg-mentions msgs))) |
d3ac9e11 | 798 | |
432a72b0 SK |
799 | (: peers-filter-denied-domains (-> (Listof Peer) (Listof Peer))) |
800 | (define (peers-filter-denied-domains peers) | |
801 | (define deny-file (build-path tt-home-dir "domains-deny.txt")) | |
802 | (define denied-hosts | |
803 | (list->set (map string-trim (filter-comments (file->lines deny-file))))) | |
804 | (define denied-domain-patterns | |
805 | (set-map denied-hosts (λ (h) (pregexp (string-append "\\." h "$"))))) | |
806 | (filter | |
807 | (λ (p) | |
1ecda371 | 808 | (define host (url-host (Peer-url p))) |
432a72b0 SK |
809 | (not (or (set-member? denied-hosts host) |
810 | (ormap (λ (d) (regexp-match? d host)) denied-domain-patterns)))) | |
811 | peers)) | |
812 | ||
56de6228 SK |
813 | (: log-writer-stop (-> Thread Void)) |
814 | (define (log-writer-stop log-writer) | |
815 | (log-message (current-logger) 'fatal 'stop "Exiting." #f) | |
816 | (thread-wait log-writer)) | |
817 | ||
0d3f753c SK |
818 | (: log-writer-start (-> Log-Level Thread)) |
819 | (define (log-writer-start level) | |
56de6228 SK |
820 | (let* ([logger |
821 | (make-logger #f #f level #f)] | |
822 | [log-receiver | |
823 | (make-log-receiver logger level)] | |
824 | [log-writer | |
825 | (thread | |
826 | (λ () | |
827 | (parameterize | |
828 | ([date-display-format 'iso-8601]) | |
829 | (let loop () | |
830 | (match-define (vector level msg _ topic) (sync log-receiver)) | |
831 | (unless (equal? topic 'stop) | |
832 | (eprintf "~a [~a] ~a~n" (date->string (current-date) #t) level msg) | |
833 | (loop))))))]) | |
834 | (current-logger logger) | |
835 | log-writer)) | |
01e4c499 | 836 | |
5fef9856 | 837 | (: msgs->nick-hist (-> (Listof Msg) Url-Nick-Hist)) |
651cf37d | 838 | (define (msgs->nick-hist msgs) |
5272d418 | 839 | (foldl |
651cf37d SK |
840 | (λ (msg url->nick->hist) |
841 | (match-define (Msg curr _ from _ mentions) msg) | |
842 | (foldl | |
843 | (λ (peer url->nick->hist) | |
844 | (match-define (Peer nick url _ _) peer) | |
845 | (if nick | |
846 | (hash-update url->nick->hist | |
847 | url | |
848 | (λ (nick->hist) | |
849 | (hash-update nick->hist | |
850 | nick | |
851 | (match-lambda | |
852 | [(Hist freq prev) | |
853 | (Hist (+ 1 freq) (max prev curr))]) | |
854 | (Hist 0 0))) | |
855 | (hash)) | |
856 | url->nick->hist)) | |
857 | url->nick->hist | |
858 | (cons from mentions))) | |
5272d418 | 859 | (hash) |
651cf37d | 860 | msgs)) |
5272d418 | 861 | |
5fef9856 SK |
862 | (: url-nick-hist->file (-> Url-Nick-Hist Path-String Void)) |
863 | (define (url-nick-hist->file unh filepath) | |
8532efc9 SK |
864 | (define out (open-output-file filepath #:exists 'replace)) |
865 | (for-each | |
866 | (match-lambda | |
867 | [(cons url nick->hist) | |
868 | (displayln (url->string url) out) | |
869 | (for-each (match-lambda | |
870 | [(cons nick (Hist freq last)) | |
871 | (displayln (format " ~a ~a ~a" nick freq last) out)]) | |
872 | (sort (hash->list nick->hist) | |
873 | (match-lambda** | |
874 | [((cons _ (Hist a _)) (cons _ (Hist b _))) | |
875 | (> a b)])))]) | |
876 | (sort | |
5fef9856 | 877 | (hash->list unh) |
8532efc9 SK |
878 | (λ (a b) (string<? (url-host (car a)) |
879 | (url-host (car b)))))) | |
880 | (close-output-port out)) | |
881 | ||
5fef9856 SK |
882 | (: url-nick-hist->dir (-> Url-Nick-Hist Path-String Void)) |
883 | (define (url-nick-hist->dir unh dirpath) | |
5272d418 | 884 | (hash-for-each |
5fef9856 | 885 | unh |
651cf37d | 886 | (λ (url nick->hist) |
96412b0a | 887 | (define filename (string-append (uri-encode (url->string url)) ".txt")) |
8532efc9 | 888 | (define filepath (build-path dirpath filename)) |
96412b0a | 889 | (make-parent-directory* filepath) |
5272d418 | 890 | (display-lines-to-file |
651cf37d SK |
891 | (map (match-lambda |
892 | [(cons nick (Hist freq last)) | |
893 | (format "~a ~a ~a" nick freq last)]) | |
894 | (sort (hash->list nick->hist) | |
895 | (match-lambda** | |
896 | [((cons _ (Hist a _)) (cons _ (Hist b _))) | |
897 | (> a b)]))) | |
96412b0a | 898 | filepath |
5272d418 SK |
899 | #:exists 'replace)))) |
900 | ||
5fef9856 SK |
901 | (: update-nicks-history-files (-> Url-Nick-Hist Void)) |
902 | (define (update-nicks-history-files unh) | |
8532efc9 | 903 | (define nicks-dir (build-path tt-home-dir "nicks")) |
5fef9856 SK |
904 | (url-nick-hist->file unh (build-path nicks-dir "seen.txt")) |
905 | (url-nick-hist->dir unh (build-path nicks-dir "seen"))) | |
8532efc9 | 906 | |
5fef9856 SK |
907 | (: url-nick-hist-most-by (-> Url-Nick-Hist Url (-> Hist Nonnegative-Integer) (Option String))) |
908 | (define (url-nick-hist-most-by url->nick->hist url by) | |
651cf37d SK |
909 | (match (hash-ref url->nick->hist url #f) |
910 | [#f #f] | |
911 | [nick->hist | |
912 | (match (sort (hash->list nick->hist) | |
913 | (λ (a b) (> (by (cdr a)) | |
914 | (by (cdr b))))) | |
915 | ['() #f] | |
916 | [(cons (cons nick _) _) nick])])) | |
917 | ||
5fef9856 SK |
918 | (: url-nick-hist-latest (-> Url-Nick-Hist Url (Option String))) |
919 | (define (url-nick-hist-latest unh url) | |
920 | (url-nick-hist-most-by unh url Hist-last)) | |
651cf37d | 921 | |
5fef9856 SK |
922 | (: url-nick-hist-common (-> Url-Nick-Hist Url (Option String))) |
923 | (define (url-nick-hist-common unh url) | |
924 | (url-nick-hist-most-by unh url Hist-freq)) | |
651cf37d | 925 | |
5fef9856 SK |
926 | (: peers-update-nick-to-common (-> Url-Nick-Hist (Listof Peer) (Listof Peer))) |
927 | (define (peers-update-nick-to-common unh peers) | |
49df5062 SK |
928 | (map |
929 | (λ (p) | |
1ecda371 | 930 | (match (url-nick-hist-common unh (Peer-url p)) |
49df5062 SK |
931 | [#f p] |
932 | [n (struct-copy Peer p [nick n])])) | |
933 | peers)) | |
934 | ||
651cf37d SK |
935 | (module+ test |
936 | (let* ([url-str "http://foo"] | |
937 | [url (string->url url-str)] | |
938 | [nick1 "a"] | |
939 | [nick2 "b"] | |
940 | [nick3 "c"] | |
941 | [ts-str-1 "2021-11-29T23:29:08-0500"] | |
942 | [ts-str-2 "2021-11-29T23:30:00-0500"] | |
943 | [ts-1 (rfc3339->epoch ts-str-1)] | |
944 | [ts-2 (rfc3339->epoch ts-str-2)] | |
945 | [msgs | |
946 | (map (match-lambda | |
947 | [(cons ts-str nick) | |
948 | (str->msg (str->peer "test http://test") | |
949 | (string-append ts-str " Hi @<" nick " " url-str ">"))]) | |
950 | (list (cons ts-str-2 nick1) | |
951 | (cons ts-str-1 nick2) | |
952 | (cons ts-str-1 nick2) | |
953 | (cons ts-str-1 nick3) | |
954 | (cons ts-str-1 nick3) | |
955 | (cons ts-str-1 nick3)))] | |
956 | [hist | |
957 | (msgs->nick-hist msgs)]) | |
958 | (check-equal? (hash-ref (hash-ref hist url) nick1) (Hist 1 ts-2)) | |
959 | (check-equal? (hash-ref (hash-ref hist url) nick2) (Hist 2 ts-1)) | |
960 | (check-equal? (hash-ref (hash-ref hist url) nick3) (Hist 3 ts-1)) | |
5fef9856 SK |
961 | (check-equal? (url-nick-hist-common hist url) nick3) |
962 | (check-equal? (url-nick-hist-latest hist url) nick1))) | |
651cf37d | 963 | |
e8856d5c SK |
964 | (: crawl (-> Void)) |
965 | (define (crawl) | |
dd098ae3 | 966 | ; TODO Test the non-io parts of crawling |
e2840743 | 967 | (let* ([peers-all-file |
96412b0a | 968 | (build-path pub-peers-dir "all.txt")] |
e8856d5c | 969 | [peers-mentioned-file |
96412b0a | 970 | (build-path pub-peers-dir "mentioned.txt")] |
e8856d5c | 971 | [peers-parsed-file |
96412b0a | 972 | (build-path pub-peers-dir "downloaded-and-parsed.txt")] |
50f0609d | 973 | [peers-cached-file |
96412b0a | 974 | (build-path pub-peers-dir "downloaded.txt")] |
50f0609d SK |
975 | [peers-cached |
976 | (peers-cached)] | |
977 | [cached-timeline | |
978 | (peers->timeline peers-cached)] | |
5fef9856 | 979 | [url-nick-hist |
49df5062 | 980 | (msgs->nick-hist cached-timeline)] |
e8856d5c | 981 | [peers-mentioned-curr |
50f0609d | 982 | (peers-mentioned cached-timeline)] |
e8856d5c SK |
983 | [peers-mentioned-prev |
984 | (file->peers peers-mentioned-file)] | |
e8856d5c SK |
985 | [peers-all-prev |
986 | (file->peers peers-all-file)] | |
5272d418 | 987 | [peers-mentioned |
49df5062 SK |
988 | (peers-merge peers-mentioned-prev |
989 | peers-mentioned-curr)] | |
e8856d5c | 990 | [peers-all |
49df5062 | 991 | (peers-update-nick-to-common |
5fef9856 | 992 | url-nick-hist |
49df5062 SK |
993 | (peers-merge peers-mentioned |
994 | peers-all-prev | |
995 | peers-cached))] | |
e8856d5c | 996 | [peers-discovered |
dd098ae3 SK |
997 | (set->list (set-subtract (make-immutable-peers peers-all) |
998 | (make-immutable-peers peers-all-prev)))] | |
e8856d5c | 999 | [peers-parsed |
dd098ae3 | 1000 | (filter (λ (p) (> (length (peer->msgs p)) 0)) peers-all)]) |
e8856d5c | 1001 | ; TODO Deeper de-duping |
dd098ae3 SK |
1002 | (log-info "Known peers cached ~a" (length peers-cached)) |
1003 | (log-info "Known peers mentioned: ~a" (length peers-mentioned)) | |
1004 | (log-info "Known peers parsed ~a" (length peers-parsed)) | |
1005 | (log-info "Known peers total: ~a" (length peers-all)) | |
e8856d5c | 1006 | (log-info "Discovered ~a new peers:~n~a" |
dd098ae3 | 1007 | (length peers-discovered) |
e8856d5c | 1008 | (pretty-format (map |
e2840743 SK |
1009 | (match-lambda |
1010 | [(Peer n _ u c) (list n u c)]) | |
dd098ae3 | 1011 | peers-discovered))) |
5fef9856 | 1012 | (update-nicks-history-files url-nick-hist) |
50f0609d SK |
1013 | (peers->file peers-cached |
1014 | peers-cached-file) | |
e8856d5c SK |
1015 | (peers->file peers-mentioned |
1016 | peers-mentioned-file) | |
1017 | (peers->file peers-parsed | |
1018 | peers-parsed-file) | |
1019 | (peers->file peers-all | |
1020 | peers-all-file))) | |
1021 | ||
1022 | (: read (-> (Listof String) Number Number Timeline-Order Out-Format Void)) | |
1023 | (define (read file-paths ts-min ts-max order out-format) | |
1024 | (let* ([peers | |
1025 | (paths->peers file-paths)] | |
1026 | [msgs | |
1027 | (timeline-sort (peers->timeline peers) order)] | |
1028 | [include? | |
1029 | (λ (m) | |
1030 | (and (or (not ts-min) (>= (Msg-ts-epoch m) ts-min)) | |
1031 | (or (not ts-max) (<= (Msg-ts-epoch m) ts-max))))]) | |
1032 | (timeline-print out-format (filter include? msgs)))) | |
1033 | ||
1034 | (: upload (-> Void)) | |
1035 | (define (upload) | |
1036 | ; FIXME Should not exit from here, but only after cleanup/logger-stoppage. | |
1037 | (if (system (path->string (build-path tt-home-dir "hooks" "upload"))) | |
1038 | (exit 0) | |
1039 | (exit 1))) | |
1040 | ||
1041 | (: download (-> (Listof String) Positive-Integer Positive-Float Void)) | |
1042 | (define (download file-paths num-workers timeout) | |
432a72b0 SK |
1043 | (let* ([peers-given (paths->peers file-paths)] |
1044 | [peers-kept (peers-filter-denied-domains peers-given)] | |
1045 | [peers-denied (set-subtract peers-given peers-kept)]) | |
1046 | (log-info "Denied ~a peers" (length peers-denied)) | |
e8856d5c | 1047 | (define-values (_res _cpu real-ms _gc) |
432a72b0 | 1048 | (time-apply timeline-download (list num-workers timeout peers-kept))) |
e8856d5c | 1049 | (log-info "Downloaded timelines from ~a peers in ~a seconds." |
432a72b0 | 1050 | (length peers-kept) |
e8856d5c SK |
1051 | (/ real-ms 1000.0)))) |
1052 | ||
1053 | (: dispatch (-> String Void)) | |
1054 | (define (dispatch command) | |
1055 | (match command | |
1056 | [(or "d" "download") | |
2a7d82d7 SK |
1057 | ; 20 was fastest out of the tried: 1, 5, 10, 20, 25, 30. |
1058 | (let ([num-workers : Positive-Integer 20] | |
1059 | [timeout : Positive-Flonum 10.0]) | |
e8856d5c SK |
1060 | (command-line |
1061 | #:program "tt download" | |
1062 | #:once-each | |
1063 | [("-j" "--jobs") | |
2a7d82d7 SK |
1064 | positive-integer "Number of concurrent jobs." |
1065 | (set! num-workers | |
1066 | (assert (string->number positive-integer) | |
1067 | (conjoin exact-positive-integer?)))] | |
e8856d5c | 1068 | [("-t" "--timeout") |
2a7d82d7 SK |
1069 | positive-float "Timeout seconds per request." |
1070 | (set! timeout | |
1071 | (assert (string->number positive-float) | |
1072 | (conjoin positive? flonum?)))] | |
e8856d5c SK |
1073 | #:args file-paths |
1074 | (download file-paths num-workers timeout)))] | |
1075 | [(or "u" "upload") | |
1076 | (command-line | |
1077 | #:program "tt upload" #:args () (upload))] | |
1078 | [(or "r" "read") | |
1079 | (let ([out-format 'multi-line] | |
1080 | [order 'old->new] | |
1081 | [ts-min #f] | |
1082 | [ts-max #f]) | |
1083 | (command-line | |
1084 | #:program "tt read" | |
1085 | #:once-each | |
1086 | [("-r" "--rev") | |
1087 | "Reverse displayed timeline order." | |
1088 | (set! order 'new->old)] | |
1089 | [("-m" "--min") | |
1090 | m "Earliest time to display (ignore anything before it)." | |
1091 | (set! ts-min (rfc3339->epoch m))] | |
1092 | [("-x" "--max") | |
1093 | x "Latest time to display (ignore anything after it)." | |
1094 | (set! ts-max (rfc3339->epoch x))] | |
1095 | #:once-any | |
1096 | [("-s" "--short") | |
1097 | "Short output format" | |
1098 | (set! out-format 'single-line)] | |
1099 | [("-l" "--long") | |
1100 | "Long output format" | |
1101 | (set! out-format 'multi-line)] | |
1102 | #:args file-paths | |
1103 | (read file-paths ts-min ts-max order out-format)))] | |
1104 | [(or "c" "crawl") | |
1105 | (command-line | |
1106 | #:program "tt crawl" #:args () (crawl))] | |
1107 | [command | |
1108 | (eprintf "Error: invalid command: ~v\n" command) | |
1109 | (eprintf "Please use the \"--help\" option to see a list of available commands.\n") | |
1110 | (exit 1)])) | |
1111 | ||
24c6a76b | 1112 | (module+ main |
24f1f64b | 1113 | (let ([log-level 'info]) |
c562bea3 | 1114 | (command-line |
24f1f64b SK |
1115 | #:program |
1116 | "tt" | |
c562bea3 | 1117 | #:once-each |
01e4c499 SK |
1118 | [("-d" "--debug") |
1119 | "Enable debug log level." | |
1120 | (set! log-level 'debug)] | |
24f1f64b SK |
1121 | #:help-labels |
1122 | "" | |
1123 | "and <command> is one of" | |
a60c484e | 1124 | "r, read : Read the timeline (offline operation)." |
4214c0f3 | 1125 | "d, download : Download the timeline." |
edadb804 | 1126 | ; TODO Add path dynamically |
0868c39a | 1127 | "u, upload : Upload your twtxt file (alias to execute ~/.tt/hooks/upload)." |
a60c484e | 1128 | "c, crawl : Discover new peers mentioned by known peers (offline operation)." |
24f1f64b SK |
1129 | "" |
1130 | #:args (command . args) | |
0d3f753c | 1131 | (define log-writer (log-writer-start log-level)) |
4214c0f3 | 1132 | (current-command-line-arguments (list->vector args)) |
96412b0a | 1133 | (set-user-agent-str (build-path tt-home-dir "user.txt")) |
e8856d5c SK |
1134 | ; TODO dispatch should return status with which we should exit after cleanups |
1135 | (dispatch command) | |
0d3f753c | 1136 | (log-writer-stop log-writer)))) |