Test concurrent-filter-map
[tt.git] / tt.rkt
1 ; TODO optional text wrap
2 ; TODO write
3 ; TODO caching (use cache by default, unless explicitly asked for update)
4 ; - [x] value --> cache
5 ; - [ ] value <-- cache
6 ; requires: commands
7 ; TODO timeline limits
8 ; TODO feed set operations (perhaps better done externally?)
9 ; TODO timeline as a result of a query (feed set op + filter expressions)
10 ; TODO named timelines
11 ; TODO CLI params
12 ; TODO config files
13 ; TODO parse "following" from feed
14 ; - following = <nick> <uri>
15 ; TODO parse mentions:
16 ; - @<source.nick source.url> | @<source.url>
17 ; TODO highlight mentions
18 ; TODO filter on mentions
19 ; TODO highlight hashtags
20 ; TODO filter on hashtags
21 ; TODO hashtags as channels? initial hashtag special?
22 ; TODO query language
23 ; TODO console logger colors by level ('error)
24 ; TODO file logger ('debug)
25 ; TODO commands:
26 ; - r | read
27 ; - see timeline ops above
28 ; - w | write
29 ; - arg or stdin
30 ; - nick expand to URI
31 ; - q | query
32 ; - see timeline ops above
33 ; - see hashtag and channels above
34 ; - d | download
35 ; - u | upload
36 ; - calls user-configured command to upload user's own feed file to their server
37
38 #lang racket
39
40 (require openssl/sha1)
41 (require racket/date)
42
43 (require http-client)
44 (require rfc3339-old)
45
46 (module+ test
47 (require rackunit))
48
49 (struct msg (ts_epoch ts_rfc3339 nick uri text))
50 (struct feed (nick uri))
51
52 (define (concurrent-filter-map num_workers f xs)
53 ; TODO preserve order of elements OR communicate that reorder is expected
54 ; TODO switch from mailboxes to channels
55 (define (make-worker id f)
56 (define parent (current-thread))
57 (λ ()
58 (define self (current-thread))
59 (define (work)
60 (thread-send parent (cons 'next self))
61 (match (thread-receive)
62 ['done (thread-send parent (cons 'exit id))]
63 [(cons 'unit x) (begin
64 (define y (f x))
65 (when y (thread-send parent (cons 'result y)))
66 (work))]))
67 (work)))
68 (define (dispatch ws xs ys)
69 (if (empty? ws)
70 ys
71 (match (thread-receive)
72 [(cons 'exit w) (dispatch (remove w ws =) xs ys)]
73 [(cons 'result y) (dispatch ws xs (cons y ys))]
74 [(cons 'next thd) (match xs
75 ['() (begin
76 (thread-send thd 'done)
77 (dispatch ws xs ys))]
78 [(cons x xs) (begin
79 (thread-send thd (cons 'unit x))
80 (dispatch ws xs ys))])])))
81 (define workers (range num_workers))
82 (define threads (map (λ (id) (thread (make-worker id f))) workers))
83 (define results (dispatch workers xs '()))
84 (for-each thread-wait threads)
85 results)
86
87 (module+ test
88 (define n-workers 10)
89 (define given (list
90 (λ (x) (if (even? x) x #f))
91 (range 11)))
92 (check-equal?
93 (sort (apply concurrent-filter-map (cons n-workers given)) <)
94 (sort (apply filter-map given ) <)))
95
96 (define (msg-print out-format odd msg)
97 (printf
98 (match out-format
99 ['single-line "~a \033[1;37m<~a ~a>\033[0m \033[0;~am~a\033[0m~n"]
100 ['multi-line "~a~n\033[1;37m<~a ~a>\033[0m~n\033[0;~am~a\033[0m~n~n"]
101 [_ (raise (format "Invalid output format: ~a" out-format))])
102 (date->string (seconds->date [msg-ts_epoch msg]) #t)
103 (msg-nick msg)
104 (msg-uri msg)
105 (if odd 36 33)
106 (msg-text msg)))
107
108 (define re-msg-begin
109 ; TODO Zulu offset. Maybe in several formats. Which ones?
110 (pregexp "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}"))
111
112 (define (str->msg nick uri str)
113 (if (not (regexp-match? re-msg-begin str))
114 (begin
115 (log-debug "Non-msg line from nick:~a, line:~a" nick str)
116 #f)
117 (let ([toks (string-split str (regexp "\t+"))])
118 (if (not (= 2 (length toks)))
119 (begin
120 (log-warning "Invalid msg line from nick:~a, msg:~a" nick str)
121 #f)
122 (let*
123 ([ts_rfc3339 (first toks)]
124 [text (second toks)]
125 [t (string->rfc3339-record ts_rfc3339)]
126 ; TODO handle tz offset
127 [ts_epoch (find-seconds [rfc3339-record:second t]
128 [rfc3339-record:minute t]
129 [rfc3339-record:hour t]
130 [rfc3339-record:mday t]
131 [rfc3339-record:month t]
132 [rfc3339-record:year t])])
133 (msg ts_epoch ts_rfc3339 nick uri text))))))
134
135 (define (str->lines str)
136 (string-split str (regexp "[\r\n]+")))
137
138 (define (str->msgs nick uri str)
139 (filter-map (λ (line) (str->msg nick uri line)) (str->lines str)))
140
141 (define (hash-sha1 str)
142 (define in (open-input-string str))
143 (define digest (sha1 in))
144 (close-input-port in)
145 digest)
146
147 (define (uri-fetch uri)
148 (log-info "GET ~a" uri)
149 (define resp (http-get uri))
150 (define status (http-response-code resp))
151 (define body (http-response-body resp))
152 (log-debug "finished GET ~a status:~a body length:~a"
153 uri status (string-length body))
154 ; TODO Handle redirects
155 (if (= status 200)
156 (let*
157 ([url-digest
158 (hash-sha1 uri)]
159 [cache-file-path
160 (expand-user-path (string-append "~/.tt/cache/" url-digest))])
161 (display-to-file
162 body cache-file-path
163 #:exists 'replace)
164 body)
165 ; TODO A more-informative exception
166 (raise status)))
167
168 (define (timeline-print out-format timeline)
169 (for ([msg timeline]
170 [i (in-naturals)])
171 (msg-print out-format (odd? i) msg)))
172
173 (define (feed->msgs feed)
174 (log-info "downloading feed nick:~a uri:~a"
175 (feed-nick feed)
176 (feed-uri feed))
177 (with-handlers
178 ([exn:fail:network?
179 (λ (e)
180 (log-error "network error nick:~a uri:~a exn:~a"
181 (feed-nick feed)
182 (feed-uri feed)
183 e)
184 #f)]
185 [integer?
186 (λ (status)
187 (log-error "http error nick:~a uri:~a status:~a"
188 (feed-nick feed)
189 (feed-uri feed)
190 status)
191 #f)])
192 (define uri (feed-uri feed))
193 (str->msgs [feed-nick feed] uri [uri-fetch uri])))
194
195 ; TODO timeline contract : time-sorted list of messages
196 (define (timeline num_workers feeds)
197 (sort (append* (concurrent-filter-map num_workers feed->msgs feeds))
198 (λ (a b) [< (msg-ts_epoch a) (msg-ts_epoch b)])))
199
200 (define (str->feed str)
201 ; TODO validation
202 (define toks (string-split str))
203 (apply feed toks))
204
205 (define (str->feeds str)
206 (map str->feed (str->lines str)))
207
208 (define (file->feeds filename)
209 (str->feeds (file->string filename)))
210
211 (define (we-are-twtxt)
212 (define uri
213 "https://raw.githubusercontent.com/mdom/we-are-twtxt/master/we-are-twtxt.txt")
214 (str->feeds (uri-fetch uri)))
215
216 (define user-agent
217 (let*
218 ([prog-name "tt"]
219 [prog-version "0.3.4"]
220 [prog-uri "https://github.com/xandkar/tt"]
221 [user-feed-file (expand-user-path "~/twtxt-me.txt")]
222 [user
223 (if (file-exists? user-feed-file)
224 (let ([user (first (file->feeds user-feed-file))])
225 (format "+~a; @~a" (feed-uri user) (feed-nick user)))
226 (format "+~a" prog-uri))]
227 )
228 (format "~a/~a (~a)" prog-name prog-version user)))
229
230 (module+ main
231 (define (setup-logging)
232 (define logger (make-logger #f #f 'debug #f))
233 (define log-chan (make-log-receiver logger 'debug))
234 (void (thread (λ ()
235 [date-display-format 'iso-8601]
236 [let loop ()
237 (define data (sync log-chan))
238 (define level (vector-ref data 0))
239 (define msg (vector-ref data 1))
240 (define ts (date->string (current-date) #t))
241 (eprintf "~a [~a] ~a~n" ts level msg)
242 (loop)])))
243 (current-logger logger))
244
245 (setup-logging)
246 (current-http-response-auto #f)
247 (current-http-user-agent user-agent)
248 (date-display-format 'rfc2822)
249
250 (define args (current-command-line-arguments))
251 (define feeds
252 (if (vector-empty? args)
253 (we-are-twtxt)
254 (file->feeds (vector-ref args 0))))
255 (define out-format 'multi-line)
256 (define num_workers 15) ; 15 was fastest out of the tried 1, 5, 10, 15 and 20.
257 (timeline-print out-format (timeline num_workers feeds)))
This page took 0.088931 seconds and 5 git commands to generate.