78d56a0c8a9216f297e608ec6fe3371ec2f56242
[tiger.ml.git] / compiler / src / lib / tiger / tiger_parser.mly
1 %{
2 module Ast = Tiger_absyn
3 module Sym = Tiger_symbol
4
5 let pos () =
6 Tiger_position.of_lexing_positions
7 ~pos_start:(Parsing.symbol_start_pos ())
8 ~pos_end:(Parsing.symbol_end_pos ())
9 %}
10
11 /* Declarations */
12 %token AND
13 %token ARRAY
14 %token ASSIGN
15 %token BREAK
16 %token COLON
17 %token COMMA
18 %token DIVIDE
19 %token DO
20 %token DOT
21 %token ELSE
22 %token END
23 %token EOF
24 %token EQ
25 %token FOR
26 %token FUNCTION
27 %token GE
28 %token GT
29 %token <string> ID
30 %token IF
31 %token IN
32 %token <int> INT
33 %token LBRACE
34 %token LBRACK
35 %token LE
36 %token LET
37 %token LPAREN
38 %token LT
39 %token MINUS
40 %token NEQ
41 %token NIL
42 %token OF
43 %token OR
44 %token PLUS
45 %token RBRACE
46 %token RBRACK
47 %token RPAREN
48 %token SEMICOLON
49 %token <string> STRING
50 %token THEN
51 %token TIMES
52 %token TO
53 %token TYPE
54 %token VAR
55 %token WHILE
56
57 /* from lowest precedence */
58 %nonassoc THEN
59 %nonassoc ELSE
60 %nonassoc ASSIGN
61 %nonassoc OF DO
62 %left OR
63 %left AND
64 %nonassoc EQ NEQ GT LT GE LE
65 %left PLUS MINUS
66 %left TIMES DIVIDE
67 %nonassoc HIGHEST
68 /* to highest precedence */
69
70 %type <Tiger_absyn.t> program
71
72 %start program
73
74 %%
75
76 program:
77 | exp EOF { $1 }
78 | error {Tiger_error.exn ~pos:(pos ()) ~msg:"invalid syntax"}
79 ;
80
81 exp:
82 | NIL
83 { Ast.NilExp }
84 | INT
85 { Ast.IntExp $1 }
86 | MINUS exp %prec HIGHEST
87 {
88 Ast.OpExp
89 { left = Ast.IntExp 0
90 ; oper = Ast.MinusOp
91 ; right = $2
92 ; pos = pos ()
93 }
94 }
95 | lvalue LBRACK exp RBRACK OF exp
96 {
97 match $1 with
98 | Ast.SimpleVar {symbol=typ; _} ->
99 Ast.ArrayExp
100 { typ
101 ; size = $3
102 ; init = $6
103 ; pos = pos ()
104 }
105 | Ast.SubscriptVar _ | Ast.FieldVar _ ->
106 raise Parse_error
107 }
108 | ID LBRACE rec_fields_bind RBRACE
109 {
110 let type_id = $1 in
111 let fields = $3 in
112 let typ = Sym.of_string type_id in
113 let pos = pos () in
114 Ast.RecordExp {fields; typ; pos}
115 }
116 | lvalue
117 { Ast.VarExp $1 }
118 | lvalue ASSIGN exp
119 {
120 let var = $1 in
121 let exp = $3 in
122 let pos = pos () in
123 Ast.AssignExp {var; exp; pos}
124 }
125 | STRING
126 { Ast.StringExp {string = $1; pos = pos ()} }
127 | ID LPAREN fun_args RPAREN
128 {
129 Ast.CallExp
130 { func = Sym.of_string $1
131 ; args = $3
132 ; pos = pos ()
133 }
134 }
135 | exp PLUS exp
136 {
137 Ast.OpExp
138 { left = $1
139 ; oper = Ast.PlusOp
140 ; right = $3
141 ; pos = pos ()
142 }
143 }
144 | exp MINUS exp
145 {
146 Ast.OpExp
147 { left = $1
148 ; oper = Ast.MinusOp
149 ; right = $3
150 ; pos = pos ()
151 }
152 }
153 | exp TIMES exp
154 {
155 Ast.OpExp
156 { left = $1
157 ; oper = Ast.TimesOp
158 ; right = $3
159 ; pos = pos ()
160 }
161 }
162 | exp DIVIDE exp
163 {
164 Ast.OpExp
165 { left = $1
166 ; oper = Ast.DivideOp
167 ; right = $3
168 ; pos = pos ()
169 }
170 }
171 | exp EQ exp
172 {
173 Ast.OpExp
174 { left = $1
175 ; oper = Ast.EqOp
176 ; right = $3
177 ; pos = pos ()
178 }
179 }
180 | exp NEQ exp
181 {
182 Ast.OpExp
183 { left = $1
184 ; oper = Ast.NeqOp
185 ; right = $3
186 ; pos = pos ()
187 }
188 }
189 | exp GT exp
190 {
191 Ast.OpExp
192 { left = $1
193 ; oper = Ast.GtOp
194 ; right = $3
195 ; pos = pos ()
196 }
197 }
198 | exp LT exp
199 {
200 Ast.OpExp
201 { left = $1
202 ; oper = Ast.LtOp
203 ; right = $3
204 ; pos = pos ()
205 }
206 }
207 | exp GE exp
208 {
209 Ast.OpExp
210 { left = $1
211 ; oper = Ast.GeOp
212 ; right = $3
213 ; pos = pos ()
214 }
215 }
216 | exp LE exp
217 {
218 Ast.OpExp
219 { left = $1
220 ; oper = Ast.LeOp
221 ; right = $3
222 ; pos = pos ()
223 }
224 }
225 | exp AND exp
226 {
227 let e1 = $1 in
228 let e2 = $3 in
229 Ast.IfExp
230 { test = e1
231 ; then' = e2
232 ; else' = Some (Ast.IntExp 0)
233 ; pos = pos ()
234 }
235 }
236 | exp OR exp
237 {
238 let e1 = $1 in
239 let e2 = $3 in
240 Ast.IfExp
241 { test = e1
242 ; then' = Ast.IntExp 1
243 ; else' = Some e2
244 ; pos = pos ()
245 }
246 }
247 | IF exp THEN exp ELSE exp
248 {
249 let e1 = $2 in
250 let e2 = $4 in
251 let e3 = $6 in
252 Ast.IfExp
253 { test = e1
254 ; then' = e2
255 ; else' = Some e3
256 ; pos = pos ()
257 }
258 }
259 | IF exp THEN exp
260 {
261 let e1 = $2 in
262 let e2 = $4 in
263 Ast.IfExp
264 { test = e1
265 ; then' = e2
266 ; else' = None
267 ; pos = pos ()
268 }
269 }
270 | WHILE exp DO exp
271 {
272 let e1 = $2 in
273 let e2 = $4 in
274 Ast.WhileExp
275 { test = e1
276 ; body = e2
277 ; pos = pos ()
278 }
279 }
280 | FOR ID ASSIGN exp TO exp DO exp
281 {
282 let var = $2 in
283 let e1 = $4 in
284 let e2 = $6 in
285 let e3 = $8 in
286 Ast.ForExp
287 { var = Sym.of_string var
288 ; escape = ref true
289 ; lo = e1
290 ; hi = e2
291 ; body = e3
292 ; pos = pos ()
293 }
294 }
295 | BREAK
296 { Ast.BreakExp (pos ()) }
297 | LPAREN exps RPAREN
298 { Ast.SeqExp $2 }
299 | LET decs IN exps END
300 {
301 let decs = $2 in
302 let exps = $4 in
303 Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()}
304 }
305 ;
306
307 exps:
308 | { [] }
309 | exp { ($1, pos ()) :: [] }
310 | exp SEMICOLON exps { ($1, pos ()) :: $3 }
311 ;
312
313 rec_fields_bind:
314 | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] }
315 | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 }
316 ;
317
318 /* ------------------------------------------------------------------------- */
319 /* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */
320 /* ------------------------------------------------------------------------- */
321 /*
322 In order to support mutual recursion, we need to group consecutive
323 type and function declarations (see Tiger-book pages 97-99).
324
325 Initially, I defined the rules to do so as:
326
327 decs:
328 | dec { $1 :: [] }
329 | dec decs { $1 :: $2 }
330 ;
331 dec:
332 | var_dec { $1 }
333 | typ_decs { Ast.TypeDecs $1 }
334 | fun_decs { Ast.FunDecs $1 }
335 ;
336
337 which, while straightforward (and working, because ocamlyacc defaults to
338 shift in case of a conflict), nonetheless caused a shift/reduce conflict in
339 each of: typ_decs and fun_decs; where the parser did not know whether to
340 shift and stay in (typ|fun_)_dec state or to reduce and get back to dec
341 state.
342
343 Sadly, tagging the rules with a lower precedence (to explicitly favor
344 shifting) - does not help :(
345
346 %nonassoc LOWEST
347 ...
348 dec:
349 | var_dec { $1 }
350 | typ_decs %prec LOWEST { Ast.TypeDecs $1 }
351 | fun_decs %prec LOWEST { Ast.FunDecs $1 }
352 ;
353
354 The difficulty seems to be in the lack of a separator token which would be
355 able to definitively mark the end of each sequence of consecutive
356 (typ_|fun_) declarations.
357
358 Keeping this in mind, another alternative is to manually capture the possible
359 interspersion patterns in the rules like:
360
361 (N * foo) followed-by (N * not-foo)
362
363 for the exception of var_dec, which, since we do not need to group its
364 consecutive sequences, can be reduced upon first sighting.
365 */
366
367 decs:
368 | var_dec decs_any { $1 :: $2 }
369 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
370 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
371 ;
372
373 decs_any:
374 | { [] }
375 | var_dec decs_any { $1 :: $2 }
376 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
377 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
378 ;
379
380 decs_any_but_fun:
381 | { [] }
382 | var_dec decs_any { $1 :: $2 }
383 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
384 ;
385
386 decs_any_but_typ:
387 | { [] }
388 | var_dec decs_any { $1 :: $2 }
389 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
390 ;
391
392 /*---------------------------------------------------------------------------*/
393 /* END unintuitive rules for decs (which avoid shift/reduce conflicts) */
394 /*---------------------------------------------------------------------------*/
395
396 typ_decs:
397 | typ_dec { $1 :: [] }
398 | typ_dec typ_decs { $1 :: $2 }
399 ;
400
401 typ_dec:
402 | TYPE ID EQ ID
403 {
404 let type_id_left = $2 in
405 let type_id_right = $4 in
406 let pos = pos () in (* FIXME: rhs id should have its own pos, no? *)
407 Ast.TypeDec
408 { name = Sym.of_string type_id_left
409 ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos}
410 ; pos
411 }
412 }
413 | TYPE ID EQ LBRACE type_fields RBRACE
414 {
415 let type_id = $2 in
416 let type_fields = $5 in
417 Ast.TypeDec
418 { name = Sym.of_string type_id
419 ; ty = Ast.RecordTy type_fields
420 ; pos = pos ()
421 }
422 }
423 | TYPE ID EQ ARRAY OF ID
424 {
425 let type_id = Sym.of_string $2 in
426 let element_type_id = Sym.of_string $6 in
427 let pos = pos () in
428 Ast.TypeDec
429 { name = type_id
430 ; ty = Ast.ArrayTy {symbol = element_type_id; pos}
431 ; pos
432 }
433 }
434 ;
435
436 var_dec:
437 | VAR ID maybe_type_sig ASSIGN exp
438 {
439 let var_id = Sym.of_string $2 in
440 let maybe_type_sig = $3 in
441 let exp = $5 in
442 let pos = pos () in
443 Ast.VarDec
444 { name = var_id
445 ; escape = ref true
446 ; typ = maybe_type_sig
447 ; init = exp
448 ; pos
449 }
450 }
451 ;
452
453 fun_decs:
454 | fun_dec { $1 :: [] }
455 | fun_dec fun_decs { $1 :: $2 }
456 ;
457
458 fun_dec:
459 | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp
460 {
461 let name = Sym.of_string $2 in
462 let params = $4 in
463 let result = $6 in
464 let body = $8 in
465 let pos = pos () in
466 Ast.FunDec {name; params; result; body; pos}
467 }
468 ;
469
470 maybe_type_sig:
471 | { None }
472 | COLON ID { Some (Sym.of_string $2, pos ()) }
473 ;
474
475 type_fields:
476 |
477 { [] }
478 | ID COLON ID
479 {
480 let field =
481 Ast.Field
482 { name = Sym.of_string $1
483 ; escape = ref true
484 ; typ = Sym.of_string $3
485 ; pos = pos ()
486 }
487 in
488 field :: []
489 }
490 | ID COLON ID COMMA type_fields
491 {
492 let field =
493 Ast.Field
494 { name = Sym.of_string $1
495 ; escape = ref true
496 ; typ = Sym.of_string $3
497 ; pos = pos ()
498 }
499 in
500 field :: $5
501 }
502 ;
503
504 fun_args:
505 | { [] }
506 | exp { $1 :: [] }
507 | exp COMMA fun_args { $1 :: $3 }
508 ;
509
510 lvalue:
511 | ID
512 {
513 Ast.SimpleVar
514 { symbol = Sym.of_string $1
515 ; pos = pos ()
516 }
517 }
518 | lvalue LBRACK exp RBRACK
519 {
520 Ast.SubscriptVar
521 { var = $1
522 ; exp = $3
523 ; pos = pos ()
524 }
525 }
526 | lvalue DOT ID
527 {
528 Ast.FieldVar
529 { var = $1
530 ; symbol = Sym.of_string $3
531 ; pos = pos ()
532 }
533 }
534 ;
535
536 %%
This page took 0.065328 seconds and 3 git commands to generate.