Define (some) semantic errors
[tiger.ml.git] / compiler / src / lib / tiger / tiger_parser.mly
CommitLineData
543d3420 1%{
28875fec 2 module Ast = Tiger_absyn
c16dd441 3 module Err = Tiger_error
28875fec
SK
4 module Sym = Tiger_symbol
5
6 let pos () =
7 Tiger_position.of_lexing_positions
8 ~pos_start:(Parsing.symbol_start_pos ())
9 ~pos_end:(Parsing.symbol_end_pos ())
543d3420
SK
10%}
11
12/* Declarations */
13%token AND
14%token ARRAY
15%token ASSIGN
16%token BREAK
17%token COLON
18%token COMMA
19%token DIVIDE
20%token DO
21%token DOT
22%token ELSE
23%token END
24%token EOF
25%token EQ
26%token FOR
27%token FUNCTION
28%token GE
29%token GT
30%token <string> ID
31%token IF
32%token IN
33%token <int> INT
34%token LBRACE
35%token LBRACK
36%token LE
37%token LET
38%token LPAREN
39%token LT
40%token MINUS
41%token NEQ
42%token NIL
43%token OF
44%token OR
45%token PLUS
46%token RBRACE
47%token RBRACK
48%token RPAREN
49%token SEMICOLON
50%token <string> STRING
51%token THEN
52%token TIMES
53%token TO
54%token TYPE
55%token VAR
56%token WHILE
57
58/* from lowest precedence */
28875fec
SK
59%nonassoc THEN
60%nonassoc ELSE
c7598faf 61%nonassoc ASSIGN
28875fec 62%nonassoc OF DO
543d3420
SK
63%left OR
64%left AND
65%nonassoc EQ NEQ GT LT GE LE
66%left PLUS MINUS
67%left TIMES DIVIDE
28875fec 68%nonassoc HIGHEST
543d3420
SK
69/* to highest precedence */
70
28875fec 71%type <Tiger_absyn.t> program
543d3420
SK
72
73%start program
74
75%%
76
7c14a966
SK
77program:
78 | exp EOF { $1 }
c16dd441 79 | error {Err.raise (Err.Invalid_syntax (pos ()))}
7c14a966 80 ;
543d3420
SK
81
82exp:
83 | NIL
28875fec 84 { Ast.NilExp }
543d3420 85 | INT
28875fec
SK
86 { Ast.IntExp $1 }
87 | MINUS exp %prec HIGHEST
88 {
89 Ast.OpExp
90 { left = Ast.IntExp 0
91 ; oper = Ast.MinusOp
92 ; right = $2
93 ; pos = pos ()
94 }
95 }
96 | lvalue LBRACK exp RBRACK OF exp
97 {
98 match $1 with
99 | Ast.SimpleVar {symbol=typ; _} ->
100 Ast.ArrayExp
101 { typ
102 ; size = $3
103 ; init = $6
104 ; pos = pos ()
105 }
106 | Ast.SubscriptVar _ | Ast.FieldVar _ ->
107 raise Parse_error
108 }
109 | ID LBRACE rec_fields_bind RBRACE
543d3420 110 {
ef945634 111 let type_id = $1 in
28875fec
SK
112 let fields = $3 in
113 let typ = Sym.of_string type_id in
114 let pos = pos () in
115 Ast.RecordExp {fields; typ; pos}
543d3420
SK
116 }
117 | lvalue
28875fec 118 { Ast.VarExp $1 }
543d3420
SK
119 | lvalue ASSIGN exp
120 {
28875fec
SK
121 let var = $1 in
122 let exp = $3 in
123 let pos = pos () in
124 Ast.AssignExp {var; exp; pos}
543d3420
SK
125 }
126 | STRING
28875fec 127 { Ast.StringExp {string = $1; pos = pos ()} }
29de275c
SK
128 | ID LPAREN fun_args RPAREN
129 {
28875fec
SK
130 Ast.CallExp
131 { func = Sym.of_string $1
132 ; args = $3
133 ; pos = pos ()
134 }
543d3420 135 }
6a74bf44 136 | exp PLUS exp
543d3420 137 {
28875fec
SK
138 Ast.OpExp
139 { left = $1
140 ; oper = Ast.PlusOp
141 ; right = $3
142 ; pos = pos ()
143 }
6a74bf44
SK
144 }
145 | exp MINUS exp
146 {
28875fec
SK
147 Ast.OpExp
148 { left = $1
149 ; oper = Ast.MinusOp
150 ; right = $3
151 ; pos = pos ()
152 }
6a74bf44
SK
153 }
154 | exp TIMES exp
155 {
28875fec
SK
156 Ast.OpExp
157 { left = $1
158 ; oper = Ast.TimesOp
159 ; right = $3
160 ; pos = pos ()
161 }
6a74bf44
SK
162 }
163 | exp DIVIDE exp
164 {
28875fec
SK
165 Ast.OpExp
166 { left = $1
167 ; oper = Ast.DivideOp
168 ; right = $3
169 ; pos = pos ()
170 }
6a74bf44
SK
171 }
172 | exp EQ exp
173 {
28875fec
SK
174 Ast.OpExp
175 { left = $1
176 ; oper = Ast.EqOp
177 ; right = $3
178 ; pos = pos ()
179 }
6a74bf44
SK
180 }
181 | exp NEQ exp
182 {
28875fec
SK
183 Ast.OpExp
184 { left = $1
185 ; oper = Ast.NeqOp
186 ; right = $3
187 ; pos = pos ()
188 }
6a74bf44
SK
189 }
190 | exp GT exp
191 {
28875fec
SK
192 Ast.OpExp
193 { left = $1
194 ; oper = Ast.GtOp
195 ; right = $3
196 ; pos = pos ()
197 }
6a74bf44
SK
198 }
199 | exp LT exp
200 {
28875fec
SK
201 Ast.OpExp
202 { left = $1
203 ; oper = Ast.LtOp
204 ; right = $3
205 ; pos = pos ()
206 }
6a74bf44
SK
207 }
208 | exp GE exp
209 {
28875fec
SK
210 Ast.OpExp
211 { left = $1
212 ; oper = Ast.GeOp
213 ; right = $3
214 ; pos = pos ()
215 }
6a74bf44
SK
216 }
217 | exp LE exp
218 {
28875fec
SK
219 Ast.OpExp
220 { left = $1
221 ; oper = Ast.LeOp
222 ; right = $3
223 ; pos = pos ()
224 }
6a74bf44
SK
225 }
226 | exp AND exp
227 {
28875fec
SK
228 let e1 = $1 in
229 let e2 = $3 in
230 Ast.IfExp
231 { test = e1
232 ; then' = e2
233 ; else' = Some (Ast.IntExp 0)
234 ; pos = pos ()
235 }
6a74bf44
SK
236 }
237 | exp OR exp
238 {
28875fec
SK
239 let e1 = $1 in
240 let e2 = $3 in
241 Ast.IfExp
242 { test = e1
243 ; then' = Ast.IntExp 1
244 ; else' = Some e2
245 ; pos = pos ()
246 }
543d3420
SK
247 }
248 | IF exp THEN exp ELSE exp
249 {
250 let e1 = $2 in
251 let e2 = $4 in
252 let e3 = $6 in
28875fec
SK
253 Ast.IfExp
254 { test = e1
255 ; then' = e2
256 ; else' = Some e3
257 ; pos = pos ()
258 }
543d3420
SK
259 }
260 | IF exp THEN exp
261 {
28875fec
SK
262 let e1 = $2 in
263 let e2 = $4 in
264 Ast.IfExp
265 { test = e1
266 ; then' = e2
267 ; else' = None
268 ; pos = pos ()
269 }
543d3420
SK
270 }
271 | WHILE exp DO exp
272 {
28875fec
SK
273 let e1 = $2 in
274 let e2 = $4 in
275 Ast.WhileExp
276 { test = e1
277 ; body = e2
278 ; pos = pos ()
279 }
543d3420 280 }
ffff6f35 281 | FOR ID ASSIGN exp TO exp DO exp
543d3420 282 {
28875fec
SK
283 let var = $2 in
284 let e1 = $4 in
285 let e2 = $6 in
286 let e3 = $8 in
287 Ast.ForExp
288 { var = Sym.of_string var
289 ; escape = ref true
290 ; lo = e1
291 ; hi = e2
292 ; body = e3
293 ; pos = pos ()
294 }
543d3420
SK
295 }
296 | BREAK
28875fec 297 { Ast.BreakExp (pos ()) }
e7dfac93 298 | LPAREN exps RPAREN
28875fec 299 { Ast.SeqExp $2 }
e7dfac93 300 | LET decs IN exps END
543d3420
SK
301 {
302 let decs = $2 in
e7dfac93 303 let exps = $4 in
28875fec 304 Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()}
543d3420 305 }
a0db5523 306 ;
543d3420 307
28875fec
SK
308exps:
309 | { [] }
310 | exp { ($1, pos ()) :: [] }
311 | exp SEMICOLON exps { ($1, pos ()) :: $3 }
a0db5523 312 ;
b3c9d54d 313
28875fec
SK
314rec_fields_bind:
315 | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] }
316 | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 }
a0db5523 317 ;
543d3420 318
46486dc8
SK
319/* ------------------------------------------------------------------------- */
320/* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */
321/* ------------------------------------------------------------------------- */
322/*
323 In order to support mutual recursion, we need to group consecutive
324 type and function declarations (see Tiger-book pages 97-99).
325
326 Initially, I defined the rules to do so as:
327
328 decs:
329 | dec { $1 :: [] }
330 | dec decs { $1 :: $2 }
331 ;
332 dec:
333 | var_dec { $1 }
334 | typ_decs { Ast.TypeDecs $1 }
335 | fun_decs { Ast.FunDecs $1 }
336 ;
337
338 which, while straightforward (and working, because ocamlyacc defaults to
339 shift in case of a conflict), nonetheless caused a shift/reduce conflict in
340 each of: typ_decs and fun_decs; where the parser did not know whether to
341 shift and stay in (typ|fun_)_dec state or to reduce and get back to dec
342 state.
343
344 Sadly, tagging the rules with a lower precedence (to explicitly favor
345 shifting) - does not help :(
346
347 %nonassoc LOWEST
348 ...
349 dec:
350 | var_dec { $1 }
351 | typ_decs %prec LOWEST { Ast.TypeDecs $1 }
352 | fun_decs %prec LOWEST { Ast.FunDecs $1 }
353 ;
354
355 The difficulty seems to be in the lack of a separator token which would be
356 able to definitively mark the end of each sequence of consecutive
357 (typ_|fun_) declarations.
358
359 Keeping this in mind, another alternative is to manually capture the possible
360 interspersion patterns in the rules like:
361
362 (N * foo) followed-by (N * not-foo)
363
364 for the exception of var_dec, which, since we do not need to group its
365 consecutive sequences, can be reduced upon first sighting.
366*/
367
543d3420 368decs:
46486dc8
SK
369 | var_dec decs_any { $1 :: $2 }
370 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
371 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
372 ;
373
374decs_any:
375 | { [] }
376 | var_dec decs_any { $1 :: $2 }
377 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
378 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
a0db5523 379 ;
543d3420 380
46486dc8
SK
381decs_any_but_fun:
382 | { [] }
383 | var_dec decs_any { $1 :: $2 }
384 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
28875fec
SK
385 ;
386
46486dc8
SK
387decs_any_but_typ:
388 | { [] }
389 | var_dec decs_any { $1 :: $2 }
390 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
391 ;
392
393/*---------------------------------------------------------------------------*/
394/* END unintuitive rules for decs (which avoid shift/reduce conflicts) */
395/*---------------------------------------------------------------------------*/
396
28875fec 397typ_decs:
46486dc8
SK
398 | typ_dec { $1 :: [] }
399 | typ_dec typ_decs { $1 :: $2 }
28875fec
SK
400 ;
401
402typ_dec:
3fbeb7c1 403 | TYPE ID EQ ID
ef945634 404 {
28875fec
SK
405 let type_id_left = $2 in
406 let type_id_right = $4 in
407 let pos = pos () in (* FIXME: rhs id should have its own pos, no? *)
408 Ast.TypeDec
409 { name = Sym.of_string type_id_left
410 ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos}
411 ; pos
412 }
ef945634 413 }
28875fec 414 | TYPE ID EQ LBRACE type_fields RBRACE
543d3420 415 {
3fbeb7c1 416 let type_id = $2 in
28875fec
SK
417 let type_fields = $5 in
418 Ast.TypeDec
419 { name = Sym.of_string type_id
420 ; ty = Ast.RecordTy type_fields
421 ; pos = pos ()
422 }
3fbeb7c1
SK
423 }
424 | TYPE ID EQ ARRAY OF ID
425 {
28875fec
SK
426 let type_id = Sym.of_string $2 in
427 let element_type_id = Sym.of_string $6 in
428 let pos = pos () in
429 Ast.TypeDec
430 { name = type_id
431 ; ty = Ast.ArrayTy {symbol = element_type_id; pos}
432 ; pos
433 }
543d3420 434 }
28875fec 435 ;
543d3420 436
28875fec
SK
437var_dec:
438 | VAR ID maybe_type_sig ASSIGN exp
439 {
440 let var_id = Sym.of_string $2 in
441 let maybe_type_sig = $3 in
442 let exp = $5 in
443 let pos = pos () in
444 Ast.VarDec
445 { name = var_id
446 ; escape = ref true
447 ; typ = maybe_type_sig
448 ; init = exp
449 ; pos
450 }
543d3420 451 }
28875fec 452 ;
543d3420 453
28875fec 454fun_decs:
46486dc8
SK
455 | fun_dec { $1 :: [] }
456 | fun_dec fun_decs { $1 :: $2 }
28875fec
SK
457 ;
458
459fun_dec:
460 | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp
543d3420 461 {
28875fec
SK
462 let name = Sym.of_string $2 in
463 let params = $4 in
464 let result = $6 in
465 let body = $8 in
466 let pos = pos () in
467 Ast.FunDec {name; params; result; body; pos}
543d3420 468 }
a0db5523 469 ;
543d3420 470
28875fec
SK
471maybe_type_sig:
472 | { None }
473 | COLON ID { Some (Sym.of_string $2, pos ()) }
474 ;
475
476type_fields:
477 |
478 { [] }
a87678f0 479 | ID COLON ID
543d3420 480 {
28875fec
SK
481 let field =
482 Ast.Field
483 { name = Sym.of_string $1
484 ; escape = ref true
485 ; typ = Sym.of_string $3
486 ; pos = pos ()
487 }
488 in
489 field :: []
490 }
491 | ID COLON ID COMMA type_fields
492 {
493 let field =
494 Ast.Field
495 { name = Sym.of_string $1
496 ; escape = ref true
497 ; typ = Sym.of_string $3
498 ; pos = pos ()
499 }
500 in
501 field :: $5
543d3420 502 }
a0db5523 503 ;
543d3420 504
543d3420 505fun_args:
28875fec
SK
506 | { [] }
507 | exp { $1 :: [] }
508 | exp COMMA fun_args { $1 :: $3 }
a0db5523 509 ;
543d3420 510
543d3420 511lvalue:
28875fec 512 | ID
543d3420 513 {
28875fec
SK
514 Ast.SimpleVar
515 { symbol = Sym.of_string $1
516 ; pos = pos ()
517 }
543d3420 518 }
28875fec 519 | lvalue LBRACK exp RBRACK
543d3420 520 {
28875fec
SK
521 Ast.SubscriptVar
522 { var = $1
523 ; exp = $3
524 ; pos = pos ()
525 }
543d3420 526 }
28875fec 527 | lvalue DOT ID
543d3420 528 {
28875fec
SK
529 Ast.FieldVar
530 { var = $1
531 ; symbol = Sym.of_string $3
532 ; pos = pos ()
533 }
543d3420 534 }
a0db5523 535 ;
543d3420
SK
536
537%%
This page took 0.068025 seconds and 4 git commands to generate.