Fix shift/reduce conflicts introduced by grouping consecutive typ|fun decs
[tiger.ml.git] / tiger / src / lib / tiger / tiger_parser.mly
1 %{
2 module Ast = Tiger_absyn
3 module Sym = Tiger_symbol
4
5 let pos () =
6 Tiger_position.of_lexing_positions
7 ~pos_start:(Parsing.symbol_start_pos ())
8 ~pos_end:(Parsing.symbol_end_pos ())
9 %}
10
11 /* Declarations */
12 %token AND
13 %token ARRAY
14 %token ASSIGN
15 %token BREAK
16 %token COLON
17 %token COMMA
18 %token DIVIDE
19 %token DO
20 %token DOT
21 %token ELSE
22 %token END
23 %token EOF
24 %token EQ
25 %token FOR
26 %token FUNCTION
27 %token GE
28 %token GT
29 %token <string> ID
30 %token IF
31 %token IN
32 %token <int> INT
33 %token LBRACE
34 %token LBRACK
35 %token LE
36 %token LET
37 %token LPAREN
38 %token LT
39 %token MINUS
40 %token NEQ
41 %token NIL
42 %token OF
43 %token OR
44 %token PLUS
45 %token RBRACE
46 %token RBRACK
47 %token RPAREN
48 %token SEMICOLON
49 %token <string> STRING
50 %token THEN
51 %token TIMES
52 %token TO
53 %token TYPE
54 %token VAR
55 %token WHILE
56
57 /* from lowest precedence */
58 %nonassoc THEN
59 %nonassoc ELSE
60 %nonassoc ASSIGN
61 %nonassoc OF DO
62 %left OR
63 %left AND
64 %nonassoc EQ NEQ GT LT GE LE
65 %left PLUS MINUS
66 %left TIMES DIVIDE
67 %nonassoc HIGHEST
68 /* to highest precedence */
69
70 %type <Tiger_absyn.t> program
71
72 %start program
73
74 %%
75
76 program: exp EOF { $1 };
77
78 exp:
79 | NIL
80 { Ast.NilExp }
81 | INT
82 { Ast.IntExp $1 }
83 | MINUS exp %prec HIGHEST
84 {
85 Ast.OpExp
86 { left = Ast.IntExp 0
87 ; oper = Ast.MinusOp
88 ; right = $2
89 ; pos = pos ()
90 }
91 }
92 | lvalue LBRACK exp RBRACK OF exp
93 {
94 match $1 with
95 | Ast.SimpleVar {symbol=typ; _} ->
96 Ast.ArrayExp
97 { typ
98 ; size = $3
99 ; init = $6
100 ; pos = pos ()
101 }
102 | Ast.SubscriptVar _ | Ast.FieldVar _ ->
103 raise Parse_error
104 }
105 | ID LBRACE rec_fields_bind RBRACE
106 {
107 let type_id = $1 in
108 let fields = $3 in
109 let typ = Sym.of_string type_id in
110 let pos = pos () in
111 Ast.RecordExp {fields; typ; pos}
112 }
113 | lvalue
114 { Ast.VarExp $1 }
115 | lvalue ASSIGN exp
116 {
117 let var = $1 in
118 let exp = $3 in
119 let pos = pos () in
120 Ast.AssignExp {var; exp; pos}
121 }
122 | STRING
123 { Ast.StringExp {string = $1; pos = pos ()} }
124 | ID LPAREN fun_args RPAREN
125 {
126 Ast.CallExp
127 { func = Sym.of_string $1
128 ; args = $3
129 ; pos = pos ()
130 }
131 }
132 | exp PLUS exp
133 {
134 Ast.OpExp
135 { left = $1
136 ; oper = Ast.PlusOp
137 ; right = $3
138 ; pos = pos ()
139 }
140 }
141 | exp MINUS exp
142 {
143 Ast.OpExp
144 { left = $1
145 ; oper = Ast.MinusOp
146 ; right = $3
147 ; pos = pos ()
148 }
149 }
150 | exp TIMES exp
151 {
152 Ast.OpExp
153 { left = $1
154 ; oper = Ast.TimesOp
155 ; right = $3
156 ; pos = pos ()
157 }
158 }
159 | exp DIVIDE exp
160 {
161 Ast.OpExp
162 { left = $1
163 ; oper = Ast.DivideOp
164 ; right = $3
165 ; pos = pos ()
166 }
167 }
168 | exp EQ exp
169 {
170 Ast.OpExp
171 { left = $1
172 ; oper = Ast.EqOp
173 ; right = $3
174 ; pos = pos ()
175 }
176 }
177 | exp NEQ exp
178 {
179 Ast.OpExp
180 { left = $1
181 ; oper = Ast.NeqOp
182 ; right = $3
183 ; pos = pos ()
184 }
185 }
186 | exp GT exp
187 {
188 Ast.OpExp
189 { left = $1
190 ; oper = Ast.GtOp
191 ; right = $3
192 ; pos = pos ()
193 }
194 }
195 | exp LT exp
196 {
197 Ast.OpExp
198 { left = $1
199 ; oper = Ast.LtOp
200 ; right = $3
201 ; pos = pos ()
202 }
203 }
204 | exp GE exp
205 {
206 Ast.OpExp
207 { left = $1
208 ; oper = Ast.GeOp
209 ; right = $3
210 ; pos = pos ()
211 }
212 }
213 | exp LE exp
214 {
215 Ast.OpExp
216 { left = $1
217 ; oper = Ast.LeOp
218 ; right = $3
219 ; pos = pos ()
220 }
221 }
222 | exp AND exp
223 {
224 let e1 = $1 in
225 let e2 = $3 in
226 Ast.IfExp
227 { test = e1
228 ; then' = e2
229 ; else' = Some (Ast.IntExp 0)
230 ; pos = pos ()
231 }
232 }
233 | exp OR exp
234 {
235 let e1 = $1 in
236 let e2 = $3 in
237 Ast.IfExp
238 { test = e1
239 ; then' = Ast.IntExp 1
240 ; else' = Some e2
241 ; pos = pos ()
242 }
243 }
244 | IF exp THEN exp ELSE exp
245 {
246 let e1 = $2 in
247 let e2 = $4 in
248 let e3 = $6 in
249 Ast.IfExp
250 { test = e1
251 ; then' = e2
252 ; else' = Some e3
253 ; pos = pos ()
254 }
255 }
256 | IF exp THEN exp
257 {
258 let e1 = $2 in
259 let e2 = $4 in
260 Ast.IfExp
261 { test = e1
262 ; then' = e2
263 ; else' = None
264 ; pos = pos ()
265 }
266 }
267 | WHILE exp DO exp
268 {
269 let e1 = $2 in
270 let e2 = $4 in
271 Ast.WhileExp
272 { test = e1
273 ; body = e2
274 ; pos = pos ()
275 }
276 }
277 | FOR ID ASSIGN exp TO exp DO exp
278 {
279 let var = $2 in
280 let e1 = $4 in
281 let e2 = $6 in
282 let e3 = $8 in
283 Ast.ForExp
284 { var = Sym.of_string var
285 ; escape = ref true
286 ; lo = e1
287 ; hi = e2
288 ; body = e3
289 ; pos = pos ()
290 }
291 }
292 | BREAK
293 { Ast.BreakExp (pos ()) }
294 | LPAREN exps RPAREN
295 { Ast.SeqExp $2 }
296 | LET decs IN exps END
297 {
298 let decs = $2 in
299 let exps = $4 in
300 Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()}
301 }
302 ;
303
304 exps:
305 | { [] }
306 | exp { ($1, pos ()) :: [] }
307 | exp SEMICOLON exps { ($1, pos ()) :: $3 }
308 ;
309
310 rec_fields_bind:
311 | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] }
312 | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 }
313 ;
314
315 /* ------------------------------------------------------------------------- */
316 /* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */
317 /* ------------------------------------------------------------------------- */
318 /*
319 In order to support mutual recursion, we need to group consecutive
320 type and function declarations (see Tiger-book pages 97-99).
321
322 Initially, I defined the rules to do so as:
323
324 decs:
325 | dec { $1 :: [] }
326 | dec decs { $1 :: $2 }
327 ;
328 dec:
329 | var_dec { $1 }
330 | typ_decs { Ast.TypeDecs $1 }
331 | fun_decs { Ast.FunDecs $1 }
332 ;
333
334 which, while straightforward (and working, because ocamlyacc defaults to
335 shift in case of a conflict), nonetheless caused a shift/reduce conflict in
336 each of: typ_decs and fun_decs; where the parser did not know whether to
337 shift and stay in (typ|fun_)_dec state or to reduce and get back to dec
338 state.
339
340 Sadly, tagging the rules with a lower precedence (to explicitly favor
341 shifting) - does not help :(
342
343 %nonassoc LOWEST
344 ...
345 dec:
346 | var_dec { $1 }
347 | typ_decs %prec LOWEST { Ast.TypeDecs $1 }
348 | fun_decs %prec LOWEST { Ast.FunDecs $1 }
349 ;
350
351 The difficulty seems to be in the lack of a separator token which would be
352 able to definitively mark the end of each sequence of consecutive
353 (typ_|fun_) declarations.
354
355 Keeping this in mind, another alternative is to manually capture the possible
356 interspersion patterns in the rules like:
357
358 (N * foo) followed-by (N * not-foo)
359
360 for the exception of var_dec, which, since we do not need to group its
361 consecutive sequences, can be reduced upon first sighting.
362 */
363
364 decs:
365 | var_dec decs_any { $1 :: $2 }
366 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
367 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
368 ;
369
370 decs_any:
371 | { [] }
372 | var_dec decs_any { $1 :: $2 }
373 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
374 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
375 ;
376
377 decs_any_but_fun:
378 | { [] }
379 | var_dec decs_any { $1 :: $2 }
380 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
381 ;
382
383 decs_any_but_typ:
384 | { [] }
385 | var_dec decs_any { $1 :: $2 }
386 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
387 ;
388
389 /*---------------------------------------------------------------------------*/
390 /* END unintuitive rules for decs (which avoid shift/reduce conflicts) */
391 /*---------------------------------------------------------------------------*/
392
393 typ_decs:
394 | typ_dec { $1 :: [] }
395 | typ_dec typ_decs { $1 :: $2 }
396 ;
397
398 typ_dec:
399 | TYPE ID EQ ID
400 {
401 let type_id_left = $2 in
402 let type_id_right = $4 in
403 let pos = pos () in (* FIXME: rhs id should have its own pos, no? *)
404 Ast.TypeDec
405 { name = Sym.of_string type_id_left
406 ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos}
407 ; pos
408 }
409 }
410 | TYPE ID EQ LBRACE type_fields RBRACE
411 {
412 let type_id = $2 in
413 let type_fields = $5 in
414 Ast.TypeDec
415 { name = Sym.of_string type_id
416 ; ty = Ast.RecordTy type_fields
417 ; pos = pos ()
418 }
419 }
420 | TYPE ID EQ ARRAY OF ID
421 {
422 let type_id = Sym.of_string $2 in
423 let element_type_id = Sym.of_string $6 in
424 let pos = pos () in
425 Ast.TypeDec
426 { name = type_id
427 ; ty = Ast.ArrayTy {symbol = element_type_id; pos}
428 ; pos
429 }
430 }
431 ;
432
433 var_dec:
434 | VAR ID maybe_type_sig ASSIGN exp
435 {
436 let var_id = Sym.of_string $2 in
437 let maybe_type_sig = $3 in
438 let exp = $5 in
439 let pos = pos () in
440 Ast.VarDec
441 { name = var_id
442 ; escape = ref true
443 ; typ = maybe_type_sig
444 ; init = exp
445 ; pos
446 }
447 }
448 ;
449
450 fun_decs:
451 | fun_dec { $1 :: [] }
452 | fun_dec fun_decs { $1 :: $2 }
453 ;
454
455 fun_dec:
456 | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp
457 {
458 let name = Sym.of_string $2 in
459 let params = $4 in
460 let result = $6 in
461 let body = $8 in
462 let pos = pos () in
463 Ast.FunDec {name; params; result; body; pos}
464 }
465 ;
466
467 maybe_type_sig:
468 | { None }
469 | COLON ID { Some (Sym.of_string $2, pos ()) }
470 ;
471
472 type_fields:
473 |
474 { [] }
475 | ID COLON ID
476 {
477 let field =
478 Ast.Field
479 { name = Sym.of_string $1
480 ; escape = ref true
481 ; typ = Sym.of_string $3
482 ; pos = pos ()
483 }
484 in
485 field :: []
486 }
487 | ID COLON ID COMMA type_fields
488 {
489 let field =
490 Ast.Field
491 { name = Sym.of_string $1
492 ; escape = ref true
493 ; typ = Sym.of_string $3
494 ; pos = pos ()
495 }
496 in
497 field :: $5
498 }
499 ;
500
501 fun_args:
502 | { [] }
503 | exp { $1 :: [] }
504 | exp COMMA fun_args { $1 :: $3 }
505 ;
506
507 lvalue:
508 | ID
509 {
510 Ast.SimpleVar
511 { symbol = Sym.of_string $1
512 ; pos = pos ()
513 }
514 }
515 | lvalue LBRACK exp RBRACK
516 {
517 Ast.SubscriptVar
518 { var = $1
519 ; exp = $3
520 ; pos = pos ()
521 }
522 }
523 | lvalue DOT ID
524 {
525 Ast.FieldVar
526 { var = $1
527 ; symbol = Sym.of_string $3
528 ; pos = pos ()
529 }
530 }
531 ;
532
533 %%
This page took 0.083766 seconds and 4 git commands to generate.