Fix grammar - allow empty set of record fields
[tiger.ml.git] / compiler / src / lib / tiger / tiger_parser.mly
1 %{
2 module Ast = Tiger_absyn
3 module Err = Tiger_error
4 module Sym = Tiger_symbol
5
6 let pos () =
7 Tiger_position.of_lexing_positions
8 ~pos_start:(Parsing.symbol_start_pos ())
9 ~pos_end:(Parsing.symbol_end_pos ())
10 %}
11
12 /* Declarations */
13 %token AND
14 %token ARRAY
15 %token ASSIGN
16 %token BREAK
17 %token COLON
18 %token COMMA
19 %token DIVIDE
20 %token DO
21 %token DOT
22 %token ELSE
23 %token END
24 %token EOF
25 %token EQ
26 %token FOR
27 %token FUNCTION
28 %token GE
29 %token GT
30 %token <string> ID
31 %token IF
32 %token IN
33 %token <int> INT
34 %token LBRACE
35 %token LBRACK
36 %token LE
37 %token LET
38 %token LPAREN
39 %token LT
40 %token MINUS
41 %token NEQ
42 %token NIL
43 %token OF
44 %token OR
45 %token PLUS
46 %token RBRACE
47 %token RBRACK
48 %token RPAREN
49 %token SEMICOLON
50 %token <string> STRING
51 %token THEN
52 %token TIMES
53 %token TO
54 %token TYPE
55 %token VAR
56 %token WHILE
57
58 /* from lowest precedence */
59 %nonassoc THEN
60 %nonassoc ELSE
61 %nonassoc ASSIGN
62 %nonassoc OF DO
63 %left OR
64 %left AND
65 %nonassoc EQ NEQ GT LT GE LE
66 %left PLUS MINUS
67 %left TIMES DIVIDE
68 %nonassoc HIGHEST
69 /* to highest precedence */
70
71 %type <Tiger_absyn.t> program
72
73 %start program
74
75 %%
76
77 program:
78 | exp EOF { $1 }
79 | error {Err.raise (Err.Invalid_syntax (pos ()))}
80 ;
81
82 exp:
83 | NIL
84 { Ast.NilExp }
85 | INT
86 { Ast.IntExp $1 }
87 | MINUS exp %prec HIGHEST
88 {
89 Ast.OpExp
90 { left = Ast.IntExp 0
91 ; oper = Ast.MinusOp
92 ; right = $2
93 ; pos = pos ()
94 }
95 }
96 | lvalue LBRACK exp RBRACK OF exp
97 {
98 match $1 with
99 | Ast.SimpleVar {symbol=typ; _} ->
100 Ast.ArrayExp
101 { typ
102 ; size = $3
103 ; init = $6
104 ; pos = pos ()
105 }
106 | Ast.SubscriptVar _ | Ast.FieldVar _ ->
107 raise Parse_error
108 }
109 | ID LBRACE rec_fields_bind RBRACE
110 {
111 let type_id = $1 in
112 let fields = $3 in
113 let typ = Sym.of_string type_id in
114 let pos = pos () in
115 Ast.RecordExp {fields; typ; pos}
116 }
117 | lvalue
118 { Ast.VarExp $1 }
119 | lvalue ASSIGN exp
120 {
121 let var = $1 in
122 let exp = $3 in
123 let pos = pos () in
124 Ast.AssignExp {var; exp; pos}
125 }
126 | STRING
127 { Ast.StringExp {string = $1; pos = pos ()} }
128 | ID LPAREN fun_args RPAREN
129 {
130 Ast.CallExp
131 { func = Sym.of_string $1
132 ; args = $3
133 ; pos = pos ()
134 }
135 }
136 | exp PLUS exp
137 {
138 Ast.OpExp
139 { left = $1
140 ; oper = Ast.PlusOp
141 ; right = $3
142 ; pos = pos ()
143 }
144 }
145 | exp MINUS exp
146 {
147 Ast.OpExp
148 { left = $1
149 ; oper = Ast.MinusOp
150 ; right = $3
151 ; pos = pos ()
152 }
153 }
154 | exp TIMES exp
155 {
156 Ast.OpExp
157 { left = $1
158 ; oper = Ast.TimesOp
159 ; right = $3
160 ; pos = pos ()
161 }
162 }
163 | exp DIVIDE exp
164 {
165 Ast.OpExp
166 { left = $1
167 ; oper = Ast.DivideOp
168 ; right = $3
169 ; pos = pos ()
170 }
171 }
172 | exp EQ exp
173 {
174 Ast.OpExp
175 { left = $1
176 ; oper = Ast.EqOp
177 ; right = $3
178 ; pos = pos ()
179 }
180 }
181 | exp NEQ exp
182 {
183 Ast.OpExp
184 { left = $1
185 ; oper = Ast.NeqOp
186 ; right = $3
187 ; pos = pos ()
188 }
189 }
190 | exp GT exp
191 {
192 Ast.OpExp
193 { left = $1
194 ; oper = Ast.GtOp
195 ; right = $3
196 ; pos = pos ()
197 }
198 }
199 | exp LT exp
200 {
201 Ast.OpExp
202 { left = $1
203 ; oper = Ast.LtOp
204 ; right = $3
205 ; pos = pos ()
206 }
207 }
208 | exp GE exp
209 {
210 Ast.OpExp
211 { left = $1
212 ; oper = Ast.GeOp
213 ; right = $3
214 ; pos = pos ()
215 }
216 }
217 | exp LE exp
218 {
219 Ast.OpExp
220 { left = $1
221 ; oper = Ast.LeOp
222 ; right = $3
223 ; pos = pos ()
224 }
225 }
226 | exp AND exp
227 {
228 let e1 = $1 in
229 let e2 = $3 in
230 Ast.IfExp
231 { test = e1
232 ; then' = e2
233 ; else' = Some (Ast.IntExp 0)
234 ; pos = pos ()
235 }
236 }
237 | exp OR exp
238 {
239 let e1 = $1 in
240 let e2 = $3 in
241 Ast.IfExp
242 { test = e1
243 ; then' = Ast.IntExp 1
244 ; else' = Some e2
245 ; pos = pos ()
246 }
247 }
248 | IF exp THEN exp ELSE exp
249 {
250 let e1 = $2 in
251 let e2 = $4 in
252 let e3 = $6 in
253 Ast.IfExp
254 { test = e1
255 ; then' = e2
256 ; else' = Some e3
257 ; pos = pos ()
258 }
259 }
260 | IF exp THEN exp
261 {
262 let e1 = $2 in
263 let e2 = $4 in
264 Ast.IfExp
265 { test = e1
266 ; then' = e2
267 ; else' = None
268 ; pos = pos ()
269 }
270 }
271 | WHILE exp DO exp
272 {
273 let e1 = $2 in
274 let e2 = $4 in
275 Ast.WhileExp
276 { test = e1
277 ; body = e2
278 ; pos = pos ()
279 }
280 }
281 | FOR ID ASSIGN exp TO exp DO exp
282 {
283 let var = $2 in
284 let e1 = $4 in
285 let e2 = $6 in
286 let e3 = $8 in
287 Ast.ForExp
288 { var = Sym.of_string var
289 ; escape = ref true
290 ; lo = e1
291 ; hi = e2
292 ; body = e3
293 ; pos = pos ()
294 }
295 }
296 | BREAK
297 { Ast.BreakExp (pos ()) }
298 | LPAREN exps RPAREN
299 { Ast.SeqExp $2 }
300 | LET decs IN exps END
301 {
302 let decs = $2 in
303 let exps = $4 in
304 Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()}
305 }
306 ;
307
308 exps:
309 | { [] }
310 | exp { ($1, pos ()) :: [] }
311 | exp SEMICOLON exps { ($1, pos ()) :: $3 }
312 ;
313
314 rec_fields_bind:
315 | { [] }
316 | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] }
317 | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 }
318 ;
319
320 /* ------------------------------------------------------------------------- */
321 /* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */
322 /* ------------------------------------------------------------------------- */
323 /*
324 In order to support mutual recursion, we need to group consecutive
325 type and function declarations (see Tiger-book pages 97-99).
326
327 Initially, I defined the rules to do so as:
328
329 decs:
330 | dec { $1 :: [] }
331 | dec decs { $1 :: $2 }
332 ;
333 dec:
334 | var_dec { $1 }
335 | typ_decs { Ast.TypeDecs $1 }
336 | fun_decs { Ast.FunDecs $1 }
337 ;
338
339 which, while straightforward (and working, because ocamlyacc defaults to
340 shift in case of a conflict), nonetheless caused a shift/reduce conflict in
341 each of: typ_decs and fun_decs; where the parser did not know whether to
342 shift and stay in (typ|fun_)_dec state or to reduce and get back to dec
343 state.
344
345 Sadly, tagging the rules with a lower precedence (to explicitly favor
346 shifting) - does not help :(
347
348 %nonassoc LOWEST
349 ...
350 dec:
351 | var_dec { $1 }
352 | typ_decs %prec LOWEST { Ast.TypeDecs $1 }
353 | fun_decs %prec LOWEST { Ast.FunDecs $1 }
354 ;
355
356 The difficulty seems to be in the lack of a separator token which would be
357 able to definitively mark the end of each sequence of consecutive
358 (typ_|fun_) declarations.
359
360 Keeping this in mind, another alternative is to manually capture the possible
361 interspersion patterns in the rules like:
362
363 (N * foo) followed-by (N * not-foo)
364
365 for the exception of var_dec, which, since we do not need to group its
366 consecutive sequences, can be reduced upon first sighting.
367 */
368
369 decs:
370 | var_dec decs_any { $1 :: $2 }
371 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
372 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
373 ;
374
375 decs_any:
376 | { [] }
377 | var_dec decs_any { $1 :: $2 }
378 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
379 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
380 ;
381
382 decs_any_but_fun:
383 | { [] }
384 | var_dec decs_any { $1 :: $2 }
385 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
386 ;
387
388 decs_any_but_typ:
389 | { [] }
390 | var_dec decs_any { $1 :: $2 }
391 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
392 ;
393
394 /*---------------------------------------------------------------------------*/
395 /* END unintuitive rules for decs (which avoid shift/reduce conflicts) */
396 /*---------------------------------------------------------------------------*/
397
398 typ_decs:
399 | typ_dec { $1 :: [] }
400 | typ_dec typ_decs { $1 :: $2 }
401 ;
402
403 typ_dec:
404 | TYPE ID EQ ID
405 {
406 let type_id_left = $2 in
407 let type_id_right = $4 in
408 let pos = pos () in (* FIXME: rhs id should have its own pos, no? *)
409 Ast.TypeDec
410 { name = Sym.of_string type_id_left
411 ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos}
412 ; pos
413 }
414 }
415 | TYPE ID EQ LBRACE type_fields RBRACE
416 {
417 let type_id = $2 in
418 let type_fields = $5 in
419 Ast.TypeDec
420 { name = Sym.of_string type_id
421 ; ty = Ast.RecordTy type_fields
422 ; pos = pos ()
423 }
424 }
425 | TYPE ID EQ ARRAY OF ID
426 {
427 let type_id = Sym.of_string $2 in
428 let element_type_id = Sym.of_string $6 in
429 let pos = pos () in
430 Ast.TypeDec
431 { name = type_id
432 ; ty = Ast.ArrayTy {symbol = element_type_id; pos}
433 ; pos
434 }
435 }
436 ;
437
438 var_dec:
439 | VAR ID maybe_type_sig ASSIGN exp
440 {
441 let var_id = Sym.of_string $2 in
442 let maybe_type_sig = $3 in
443 let exp = $5 in
444 let pos = pos () in
445 Ast.VarDec
446 { name = var_id
447 ; escape = ref true
448 ; typ = maybe_type_sig
449 ; init = exp
450 ; pos
451 }
452 }
453 ;
454
455 fun_decs:
456 | fun_dec { $1 :: [] }
457 | fun_dec fun_decs { $1 :: $2 }
458 ;
459
460 fun_dec:
461 | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp
462 {
463 let name = Sym.of_string $2 in
464 let params = $4 in
465 let result = $6 in
466 let body = $8 in
467 let pos = pos () in
468 Ast.FunDec {name; params; result; body; pos}
469 }
470 ;
471
472 maybe_type_sig:
473 | { None }
474 | COLON ID { Some (Sym.of_string $2, pos ()) }
475 ;
476
477 type_fields:
478 |
479 { [] }
480 | ID COLON ID
481 {
482 let field =
483 Ast.Field
484 { name = Sym.of_string $1
485 ; escape = ref true
486 ; typ = Sym.of_string $3
487 ; pos = pos ()
488 }
489 in
490 field :: []
491 }
492 | ID COLON ID COMMA type_fields
493 {
494 let field =
495 Ast.Field
496 { name = Sym.of_string $1
497 ; escape = ref true
498 ; typ = Sym.of_string $3
499 ; pos = pos ()
500 }
501 in
502 field :: $5
503 }
504 ;
505
506 fun_args:
507 | { [] }
508 | exp { $1 :: [] }
509 | exp COMMA fun_args { $1 :: $3 }
510 ;
511
512 lvalue:
513 | ID
514 {
515 Ast.SimpleVar
516 { symbol = Sym.of_string $1
517 ; pos = pos ()
518 }
519 }
520 | lvalue LBRACK exp RBRACK
521 {
522 Ast.SubscriptVar
523 { var = $1
524 ; exp = $3
525 ; pos = pos ()
526 }
527 }
528 | lvalue DOT ID
529 {
530 Ast.FieldVar
531 { var = $1
532 ; symbol = Sym.of_string $3
533 ; pos = pos ()
534 }
535 }
536 ;
537
538 %%
This page took 0.074106 seconds and 4 git commands to generate.