Define (some) semantic errors
[tiger.ml.git] / compiler / src / lib / tiger / tiger_parser.mly
1 %{
2 module Ast = Tiger_absyn
3 module Err = Tiger_error
4 module Sym = Tiger_symbol
5
6 let pos () =
7 Tiger_position.of_lexing_positions
8 ~pos_start:(Parsing.symbol_start_pos ())
9 ~pos_end:(Parsing.symbol_end_pos ())
10 %}
11
12 /* Declarations */
13 %token AND
14 %token ARRAY
15 %token ASSIGN
16 %token BREAK
17 %token COLON
18 %token COMMA
19 %token DIVIDE
20 %token DO
21 %token DOT
22 %token ELSE
23 %token END
24 %token EOF
25 %token EQ
26 %token FOR
27 %token FUNCTION
28 %token GE
29 %token GT
30 %token <string> ID
31 %token IF
32 %token IN
33 %token <int> INT
34 %token LBRACE
35 %token LBRACK
36 %token LE
37 %token LET
38 %token LPAREN
39 %token LT
40 %token MINUS
41 %token NEQ
42 %token NIL
43 %token OF
44 %token OR
45 %token PLUS
46 %token RBRACE
47 %token RBRACK
48 %token RPAREN
49 %token SEMICOLON
50 %token <string> STRING
51 %token THEN
52 %token TIMES
53 %token TO
54 %token TYPE
55 %token VAR
56 %token WHILE
57
58 /* from lowest precedence */
59 %nonassoc THEN
60 %nonassoc ELSE
61 %nonassoc ASSIGN
62 %nonassoc OF DO
63 %left OR
64 %left AND
65 %nonassoc EQ NEQ GT LT GE LE
66 %left PLUS MINUS
67 %left TIMES DIVIDE
68 %nonassoc HIGHEST
69 /* to highest precedence */
70
71 %type <Tiger_absyn.t> program
72
73 %start program
74
75 %%
76
77 program:
78 | exp EOF { $1 }
79 | error {Err.raise (Err.Invalid_syntax (pos ()))}
80 ;
81
82 exp:
83 | NIL
84 { Ast.NilExp }
85 | INT
86 { Ast.IntExp $1 }
87 | MINUS exp %prec HIGHEST
88 {
89 Ast.OpExp
90 { left = Ast.IntExp 0
91 ; oper = Ast.MinusOp
92 ; right = $2
93 ; pos = pos ()
94 }
95 }
96 | lvalue LBRACK exp RBRACK OF exp
97 {
98 match $1 with
99 | Ast.SimpleVar {symbol=typ; _} ->
100 Ast.ArrayExp
101 { typ
102 ; size = $3
103 ; init = $6
104 ; pos = pos ()
105 }
106 | Ast.SubscriptVar _ | Ast.FieldVar _ ->
107 raise Parse_error
108 }
109 | ID LBRACE rec_fields_bind RBRACE
110 {
111 let type_id = $1 in
112 let fields = $3 in
113 let typ = Sym.of_string type_id in
114 let pos = pos () in
115 Ast.RecordExp {fields; typ; pos}
116 }
117 | lvalue
118 { Ast.VarExp $1 }
119 | lvalue ASSIGN exp
120 {
121 let var = $1 in
122 let exp = $3 in
123 let pos = pos () in
124 Ast.AssignExp {var; exp; pos}
125 }
126 | STRING
127 { Ast.StringExp {string = $1; pos = pos ()} }
128 | ID LPAREN fun_args RPAREN
129 {
130 Ast.CallExp
131 { func = Sym.of_string $1
132 ; args = $3
133 ; pos = pos ()
134 }
135 }
136 | exp PLUS exp
137 {
138 Ast.OpExp
139 { left = $1
140 ; oper = Ast.PlusOp
141 ; right = $3
142 ; pos = pos ()
143 }
144 }
145 | exp MINUS exp
146 {
147 Ast.OpExp
148 { left = $1
149 ; oper = Ast.MinusOp
150 ; right = $3
151 ; pos = pos ()
152 }
153 }
154 | exp TIMES exp
155 {
156 Ast.OpExp
157 { left = $1
158 ; oper = Ast.TimesOp
159 ; right = $3
160 ; pos = pos ()
161 }
162 }
163 | exp DIVIDE exp
164 {
165 Ast.OpExp
166 { left = $1
167 ; oper = Ast.DivideOp
168 ; right = $3
169 ; pos = pos ()
170 }
171 }
172 | exp EQ exp
173 {
174 Ast.OpExp
175 { left = $1
176 ; oper = Ast.EqOp
177 ; right = $3
178 ; pos = pos ()
179 }
180 }
181 | exp NEQ exp
182 {
183 Ast.OpExp
184 { left = $1
185 ; oper = Ast.NeqOp
186 ; right = $3
187 ; pos = pos ()
188 }
189 }
190 | exp GT exp
191 {
192 Ast.OpExp
193 { left = $1
194 ; oper = Ast.GtOp
195 ; right = $3
196 ; pos = pos ()
197 }
198 }
199 | exp LT exp
200 {
201 Ast.OpExp
202 { left = $1
203 ; oper = Ast.LtOp
204 ; right = $3
205 ; pos = pos ()
206 }
207 }
208 | exp GE exp
209 {
210 Ast.OpExp
211 { left = $1
212 ; oper = Ast.GeOp
213 ; right = $3
214 ; pos = pos ()
215 }
216 }
217 | exp LE exp
218 {
219 Ast.OpExp
220 { left = $1
221 ; oper = Ast.LeOp
222 ; right = $3
223 ; pos = pos ()
224 }
225 }
226 | exp AND exp
227 {
228 let e1 = $1 in
229 let e2 = $3 in
230 Ast.IfExp
231 { test = e1
232 ; then' = e2
233 ; else' = Some (Ast.IntExp 0)
234 ; pos = pos ()
235 }
236 }
237 | exp OR exp
238 {
239 let e1 = $1 in
240 let e2 = $3 in
241 Ast.IfExp
242 { test = e1
243 ; then' = Ast.IntExp 1
244 ; else' = Some e2
245 ; pos = pos ()
246 }
247 }
248 | IF exp THEN exp ELSE exp
249 {
250 let e1 = $2 in
251 let e2 = $4 in
252 let e3 = $6 in
253 Ast.IfExp
254 { test = e1
255 ; then' = e2
256 ; else' = Some e3
257 ; pos = pos ()
258 }
259 }
260 | IF exp THEN exp
261 {
262 let e1 = $2 in
263 let e2 = $4 in
264 Ast.IfExp
265 { test = e1
266 ; then' = e2
267 ; else' = None
268 ; pos = pos ()
269 }
270 }
271 | WHILE exp DO exp
272 {
273 let e1 = $2 in
274 let e2 = $4 in
275 Ast.WhileExp
276 { test = e1
277 ; body = e2
278 ; pos = pos ()
279 }
280 }
281 | FOR ID ASSIGN exp TO exp DO exp
282 {
283 let var = $2 in
284 let e1 = $4 in
285 let e2 = $6 in
286 let e3 = $8 in
287 Ast.ForExp
288 { var = Sym.of_string var
289 ; escape = ref true
290 ; lo = e1
291 ; hi = e2
292 ; body = e3
293 ; pos = pos ()
294 }
295 }
296 | BREAK
297 { Ast.BreakExp (pos ()) }
298 | LPAREN exps RPAREN
299 { Ast.SeqExp $2 }
300 | LET decs IN exps END
301 {
302 let decs = $2 in
303 let exps = $4 in
304 Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()}
305 }
306 ;
307
308 exps:
309 | { [] }
310 | exp { ($1, pos ()) :: [] }
311 | exp SEMICOLON exps { ($1, pos ()) :: $3 }
312 ;
313
314 rec_fields_bind:
315 | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] }
316 | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 }
317 ;
318
319 /* ------------------------------------------------------------------------- */
320 /* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */
321 /* ------------------------------------------------------------------------- */
322 /*
323 In order to support mutual recursion, we need to group consecutive
324 type and function declarations (see Tiger-book pages 97-99).
325
326 Initially, I defined the rules to do so as:
327
328 decs:
329 | dec { $1 :: [] }
330 | dec decs { $1 :: $2 }
331 ;
332 dec:
333 | var_dec { $1 }
334 | typ_decs { Ast.TypeDecs $1 }
335 | fun_decs { Ast.FunDecs $1 }
336 ;
337
338 which, while straightforward (and working, because ocamlyacc defaults to
339 shift in case of a conflict), nonetheless caused a shift/reduce conflict in
340 each of: typ_decs and fun_decs; where the parser did not know whether to
341 shift and stay in (typ|fun_)_dec state or to reduce and get back to dec
342 state.
343
344 Sadly, tagging the rules with a lower precedence (to explicitly favor
345 shifting) - does not help :(
346
347 %nonassoc LOWEST
348 ...
349 dec:
350 | var_dec { $1 }
351 | typ_decs %prec LOWEST { Ast.TypeDecs $1 }
352 | fun_decs %prec LOWEST { Ast.FunDecs $1 }
353 ;
354
355 The difficulty seems to be in the lack of a separator token which would be
356 able to definitively mark the end of each sequence of consecutive
357 (typ_|fun_) declarations.
358
359 Keeping this in mind, another alternative is to manually capture the possible
360 interspersion patterns in the rules like:
361
362 (N * foo) followed-by (N * not-foo)
363
364 for the exception of var_dec, which, since we do not need to group its
365 consecutive sequences, can be reduced upon first sighting.
366 */
367
368 decs:
369 | var_dec decs_any { $1 :: $2 }
370 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
371 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
372 ;
373
374 decs_any:
375 | { [] }
376 | var_dec decs_any { $1 :: $2 }
377 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
378 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
379 ;
380
381 decs_any_but_fun:
382 | { [] }
383 | var_dec decs_any { $1 :: $2 }
384 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
385 ;
386
387 decs_any_but_typ:
388 | { [] }
389 | var_dec decs_any { $1 :: $2 }
390 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
391 ;
392
393 /*---------------------------------------------------------------------------*/
394 /* END unintuitive rules for decs (which avoid shift/reduce conflicts) */
395 /*---------------------------------------------------------------------------*/
396
397 typ_decs:
398 | typ_dec { $1 :: [] }
399 | typ_dec typ_decs { $1 :: $2 }
400 ;
401
402 typ_dec:
403 | TYPE ID EQ ID
404 {
405 let type_id_left = $2 in
406 let type_id_right = $4 in
407 let pos = pos () in (* FIXME: rhs id should have its own pos, no? *)
408 Ast.TypeDec
409 { name = Sym.of_string type_id_left
410 ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos}
411 ; pos
412 }
413 }
414 | TYPE ID EQ LBRACE type_fields RBRACE
415 {
416 let type_id = $2 in
417 let type_fields = $5 in
418 Ast.TypeDec
419 { name = Sym.of_string type_id
420 ; ty = Ast.RecordTy type_fields
421 ; pos = pos ()
422 }
423 }
424 | TYPE ID EQ ARRAY OF ID
425 {
426 let type_id = Sym.of_string $2 in
427 let element_type_id = Sym.of_string $6 in
428 let pos = pos () in
429 Ast.TypeDec
430 { name = type_id
431 ; ty = Ast.ArrayTy {symbol = element_type_id; pos}
432 ; pos
433 }
434 }
435 ;
436
437 var_dec:
438 | VAR ID maybe_type_sig ASSIGN exp
439 {
440 let var_id = Sym.of_string $2 in
441 let maybe_type_sig = $3 in
442 let exp = $5 in
443 let pos = pos () in
444 Ast.VarDec
445 { name = var_id
446 ; escape = ref true
447 ; typ = maybe_type_sig
448 ; init = exp
449 ; pos
450 }
451 }
452 ;
453
454 fun_decs:
455 | fun_dec { $1 :: [] }
456 | fun_dec fun_decs { $1 :: $2 }
457 ;
458
459 fun_dec:
460 | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp
461 {
462 let name = Sym.of_string $2 in
463 let params = $4 in
464 let result = $6 in
465 let body = $8 in
466 let pos = pos () in
467 Ast.FunDec {name; params; result; body; pos}
468 }
469 ;
470
471 maybe_type_sig:
472 | { None }
473 | COLON ID { Some (Sym.of_string $2, pos ()) }
474 ;
475
476 type_fields:
477 |
478 { [] }
479 | ID COLON ID
480 {
481 let field =
482 Ast.Field
483 { name = Sym.of_string $1
484 ; escape = ref true
485 ; typ = Sym.of_string $3
486 ; pos = pos ()
487 }
488 in
489 field :: []
490 }
491 | ID COLON ID COMMA type_fields
492 {
493 let field =
494 Ast.Field
495 { name = Sym.of_string $1
496 ; escape = ref true
497 ; typ = Sym.of_string $3
498 ; pos = pos ()
499 }
500 in
501 field :: $5
502 }
503 ;
504
505 fun_args:
506 | { [] }
507 | exp { $1 :: [] }
508 | exp COMMA fun_args { $1 :: $3 }
509 ;
510
511 lvalue:
512 | ID
513 {
514 Ast.SimpleVar
515 { symbol = Sym.of_string $1
516 ; pos = pos ()
517 }
518 }
519 | lvalue LBRACK exp RBRACK
520 {
521 Ast.SubscriptVar
522 { var = $1
523 ; exp = $3
524 ; pos = pos ()
525 }
526 }
527 | lvalue DOT ID
528 {
529 Ast.FieldVar
530 { var = $1
531 ; symbol = Sym.of_string $3
532 ; pos = pos ()
533 }
534 }
535 ;
536
537 %%
This page took 0.069428 seconds and 4 git commands to generate.