Fix grammar - allow empty set of record fields
[tiger.ml.git] / compiler / src / lib / tiger / tiger_parser.mly
CommitLineData
543d3420 1%{
28875fec 2 module Ast = Tiger_absyn
c16dd441 3 module Err = Tiger_error
28875fec
SK
4 module Sym = Tiger_symbol
5
6 let pos () =
7 Tiger_position.of_lexing_positions
8 ~pos_start:(Parsing.symbol_start_pos ())
9 ~pos_end:(Parsing.symbol_end_pos ())
543d3420
SK
10%}
11
12/* Declarations */
13%token AND
14%token ARRAY
15%token ASSIGN
16%token BREAK
17%token COLON
18%token COMMA
19%token DIVIDE
20%token DO
21%token DOT
22%token ELSE
23%token END
24%token EOF
25%token EQ
26%token FOR
27%token FUNCTION
28%token GE
29%token GT
30%token <string> ID
31%token IF
32%token IN
33%token <int> INT
34%token LBRACE
35%token LBRACK
36%token LE
37%token LET
38%token LPAREN
39%token LT
40%token MINUS
41%token NEQ
42%token NIL
43%token OF
44%token OR
45%token PLUS
46%token RBRACE
47%token RBRACK
48%token RPAREN
49%token SEMICOLON
50%token <string> STRING
51%token THEN
52%token TIMES
53%token TO
54%token TYPE
55%token VAR
56%token WHILE
57
58/* from lowest precedence */
28875fec
SK
59%nonassoc THEN
60%nonassoc ELSE
c7598faf 61%nonassoc ASSIGN
28875fec 62%nonassoc OF DO
543d3420
SK
63%left OR
64%left AND
65%nonassoc EQ NEQ GT LT GE LE
66%left PLUS MINUS
67%left TIMES DIVIDE
28875fec 68%nonassoc HIGHEST
543d3420
SK
69/* to highest precedence */
70
28875fec 71%type <Tiger_absyn.t> program
543d3420
SK
72
73%start program
74
75%%
76
7c14a966
SK
77program:
78 | exp EOF { $1 }
c16dd441 79 | error {Err.raise (Err.Invalid_syntax (pos ()))}
7c14a966 80 ;
543d3420
SK
81
82exp:
83 | NIL
28875fec 84 { Ast.NilExp }
543d3420 85 | INT
28875fec
SK
86 { Ast.IntExp $1 }
87 | MINUS exp %prec HIGHEST
88 {
89 Ast.OpExp
90 { left = Ast.IntExp 0
91 ; oper = Ast.MinusOp
92 ; right = $2
93 ; pos = pos ()
94 }
95 }
96 | lvalue LBRACK exp RBRACK OF exp
97 {
98 match $1 with
99 | Ast.SimpleVar {symbol=typ; _} ->
100 Ast.ArrayExp
101 { typ
102 ; size = $3
103 ; init = $6
104 ; pos = pos ()
105 }
106 | Ast.SubscriptVar _ | Ast.FieldVar _ ->
107 raise Parse_error
108 }
109 | ID LBRACE rec_fields_bind RBRACE
543d3420 110 {
ef945634 111 let type_id = $1 in
28875fec
SK
112 let fields = $3 in
113 let typ = Sym.of_string type_id in
114 let pos = pos () in
115 Ast.RecordExp {fields; typ; pos}
543d3420
SK
116 }
117 | lvalue
28875fec 118 { Ast.VarExp $1 }
543d3420
SK
119 | lvalue ASSIGN exp
120 {
28875fec
SK
121 let var = $1 in
122 let exp = $3 in
123 let pos = pos () in
124 Ast.AssignExp {var; exp; pos}
543d3420
SK
125 }
126 | STRING
28875fec 127 { Ast.StringExp {string = $1; pos = pos ()} }
29de275c
SK
128 | ID LPAREN fun_args RPAREN
129 {
28875fec
SK
130 Ast.CallExp
131 { func = Sym.of_string $1
132 ; args = $3
133 ; pos = pos ()
134 }
543d3420 135 }
6a74bf44 136 | exp PLUS exp
543d3420 137 {
28875fec
SK
138 Ast.OpExp
139 { left = $1
140 ; oper = Ast.PlusOp
141 ; right = $3
142 ; pos = pos ()
143 }
6a74bf44
SK
144 }
145 | exp MINUS exp
146 {
28875fec
SK
147 Ast.OpExp
148 { left = $1
149 ; oper = Ast.MinusOp
150 ; right = $3
151 ; pos = pos ()
152 }
6a74bf44
SK
153 }
154 | exp TIMES exp
155 {
28875fec
SK
156 Ast.OpExp
157 { left = $1
158 ; oper = Ast.TimesOp
159 ; right = $3
160 ; pos = pos ()
161 }
6a74bf44
SK
162 }
163 | exp DIVIDE exp
164 {
28875fec
SK
165 Ast.OpExp
166 { left = $1
167 ; oper = Ast.DivideOp
168 ; right = $3
169 ; pos = pos ()
170 }
6a74bf44
SK
171 }
172 | exp EQ exp
173 {
28875fec
SK
174 Ast.OpExp
175 { left = $1
176 ; oper = Ast.EqOp
177 ; right = $3
178 ; pos = pos ()
179 }
6a74bf44
SK
180 }
181 | exp NEQ exp
182 {
28875fec
SK
183 Ast.OpExp
184 { left = $1
185 ; oper = Ast.NeqOp
186 ; right = $3
187 ; pos = pos ()
188 }
6a74bf44
SK
189 }
190 | exp GT exp
191 {
28875fec
SK
192 Ast.OpExp
193 { left = $1
194 ; oper = Ast.GtOp
195 ; right = $3
196 ; pos = pos ()
197 }
6a74bf44
SK
198 }
199 | exp LT exp
200 {
28875fec
SK
201 Ast.OpExp
202 { left = $1
203 ; oper = Ast.LtOp
204 ; right = $3
205 ; pos = pos ()
206 }
6a74bf44
SK
207 }
208 | exp GE exp
209 {
28875fec
SK
210 Ast.OpExp
211 { left = $1
212 ; oper = Ast.GeOp
213 ; right = $3
214 ; pos = pos ()
215 }
6a74bf44
SK
216 }
217 | exp LE exp
218 {
28875fec
SK
219 Ast.OpExp
220 { left = $1
221 ; oper = Ast.LeOp
222 ; right = $3
223 ; pos = pos ()
224 }
6a74bf44
SK
225 }
226 | exp AND exp
227 {
28875fec
SK
228 let e1 = $1 in
229 let e2 = $3 in
230 Ast.IfExp
231 { test = e1
232 ; then' = e2
233 ; else' = Some (Ast.IntExp 0)
234 ; pos = pos ()
235 }
6a74bf44
SK
236 }
237 | exp OR exp
238 {
28875fec
SK
239 let e1 = $1 in
240 let e2 = $3 in
241 Ast.IfExp
242 { test = e1
243 ; then' = Ast.IntExp 1
244 ; else' = Some e2
245 ; pos = pos ()
246 }
543d3420
SK
247 }
248 | IF exp THEN exp ELSE exp
249 {
250 let e1 = $2 in
251 let e2 = $4 in
252 let e3 = $6 in
28875fec
SK
253 Ast.IfExp
254 { test = e1
255 ; then' = e2
256 ; else' = Some e3
257 ; pos = pos ()
258 }
543d3420
SK
259 }
260 | IF exp THEN exp
261 {
28875fec
SK
262 let e1 = $2 in
263 let e2 = $4 in
264 Ast.IfExp
265 { test = e1
266 ; then' = e2
267 ; else' = None
268 ; pos = pos ()
269 }
543d3420
SK
270 }
271 | WHILE exp DO exp
272 {
28875fec
SK
273 let e1 = $2 in
274 let e2 = $4 in
275 Ast.WhileExp
276 { test = e1
277 ; body = e2
278 ; pos = pos ()
279 }
543d3420 280 }
ffff6f35 281 | FOR ID ASSIGN exp TO exp DO exp
543d3420 282 {
28875fec
SK
283 let var = $2 in
284 let e1 = $4 in
285 let e2 = $6 in
286 let e3 = $8 in
287 Ast.ForExp
288 { var = Sym.of_string var
289 ; escape = ref true
290 ; lo = e1
291 ; hi = e2
292 ; body = e3
293 ; pos = pos ()
294 }
543d3420
SK
295 }
296 | BREAK
28875fec 297 { Ast.BreakExp (pos ()) }
e7dfac93 298 | LPAREN exps RPAREN
28875fec 299 { Ast.SeqExp $2 }
e7dfac93 300 | LET decs IN exps END
543d3420
SK
301 {
302 let decs = $2 in
e7dfac93 303 let exps = $4 in
28875fec 304 Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()}
543d3420 305 }
a0db5523 306 ;
543d3420 307
28875fec
SK
308exps:
309 | { [] }
310 | exp { ($1, pos ()) :: [] }
311 | exp SEMICOLON exps { ($1, pos ()) :: $3 }
a0db5523 312 ;
b3c9d54d 313
28875fec 314rec_fields_bind:
50641804 315 | { [] }
28875fec
SK
316 | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] }
317 | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 }
a0db5523 318 ;
543d3420 319
46486dc8
SK
320/* ------------------------------------------------------------------------- */
321/* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */
322/* ------------------------------------------------------------------------- */
323/*
324 In order to support mutual recursion, we need to group consecutive
325 type and function declarations (see Tiger-book pages 97-99).
326
327 Initially, I defined the rules to do so as:
328
329 decs:
330 | dec { $1 :: [] }
331 | dec decs { $1 :: $2 }
332 ;
333 dec:
334 | var_dec { $1 }
335 | typ_decs { Ast.TypeDecs $1 }
336 | fun_decs { Ast.FunDecs $1 }
337 ;
338
339 which, while straightforward (and working, because ocamlyacc defaults to
340 shift in case of a conflict), nonetheless caused a shift/reduce conflict in
341 each of: typ_decs and fun_decs; where the parser did not know whether to
342 shift and stay in (typ|fun_)_dec state or to reduce and get back to dec
343 state.
344
345 Sadly, tagging the rules with a lower precedence (to explicitly favor
346 shifting) - does not help :(
347
348 %nonassoc LOWEST
349 ...
350 dec:
351 | var_dec { $1 }
352 | typ_decs %prec LOWEST { Ast.TypeDecs $1 }
353 | fun_decs %prec LOWEST { Ast.FunDecs $1 }
354 ;
355
356 The difficulty seems to be in the lack of a separator token which would be
357 able to definitively mark the end of each sequence of consecutive
358 (typ_|fun_) declarations.
359
360 Keeping this in mind, another alternative is to manually capture the possible
361 interspersion patterns in the rules like:
362
363 (N * foo) followed-by (N * not-foo)
364
365 for the exception of var_dec, which, since we do not need to group its
366 consecutive sequences, can be reduced upon first sighting.
367*/
368
543d3420 369decs:
46486dc8
SK
370 | var_dec decs_any { $1 :: $2 }
371 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
372 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
373 ;
374
375decs_any:
376 | { [] }
377 | var_dec decs_any { $1 :: $2 }
378 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
379 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
a0db5523 380 ;
543d3420 381
46486dc8
SK
382decs_any_but_fun:
383 | { [] }
384 | var_dec decs_any { $1 :: $2 }
385 | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 }
28875fec
SK
386 ;
387
46486dc8
SK
388decs_any_but_typ:
389 | { [] }
390 | var_dec decs_any { $1 :: $2 }
391 | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 }
392 ;
393
394/*---------------------------------------------------------------------------*/
395/* END unintuitive rules for decs (which avoid shift/reduce conflicts) */
396/*---------------------------------------------------------------------------*/
397
28875fec 398typ_decs:
46486dc8
SK
399 | typ_dec { $1 :: [] }
400 | typ_dec typ_decs { $1 :: $2 }
28875fec
SK
401 ;
402
403typ_dec:
3fbeb7c1 404 | TYPE ID EQ ID
ef945634 405 {
28875fec
SK
406 let type_id_left = $2 in
407 let type_id_right = $4 in
408 let pos = pos () in (* FIXME: rhs id should have its own pos, no? *)
409 Ast.TypeDec
410 { name = Sym.of_string type_id_left
411 ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos}
412 ; pos
413 }
ef945634 414 }
28875fec 415 | TYPE ID EQ LBRACE type_fields RBRACE
543d3420 416 {
3fbeb7c1 417 let type_id = $2 in
28875fec
SK
418 let type_fields = $5 in
419 Ast.TypeDec
420 { name = Sym.of_string type_id
421 ; ty = Ast.RecordTy type_fields
422 ; pos = pos ()
423 }
3fbeb7c1
SK
424 }
425 | TYPE ID EQ ARRAY OF ID
426 {
28875fec
SK
427 let type_id = Sym.of_string $2 in
428 let element_type_id = Sym.of_string $6 in
429 let pos = pos () in
430 Ast.TypeDec
431 { name = type_id
432 ; ty = Ast.ArrayTy {symbol = element_type_id; pos}
433 ; pos
434 }
543d3420 435 }
28875fec 436 ;
543d3420 437
28875fec
SK
438var_dec:
439 | VAR ID maybe_type_sig ASSIGN exp
440 {
441 let var_id = Sym.of_string $2 in
442 let maybe_type_sig = $3 in
443 let exp = $5 in
444 let pos = pos () in
445 Ast.VarDec
446 { name = var_id
447 ; escape = ref true
448 ; typ = maybe_type_sig
449 ; init = exp
450 ; pos
451 }
543d3420 452 }
28875fec 453 ;
543d3420 454
28875fec 455fun_decs:
46486dc8
SK
456 | fun_dec { $1 :: [] }
457 | fun_dec fun_decs { $1 :: $2 }
28875fec
SK
458 ;
459
460fun_dec:
461 | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp
543d3420 462 {
28875fec
SK
463 let name = Sym.of_string $2 in
464 let params = $4 in
465 let result = $6 in
466 let body = $8 in
467 let pos = pos () in
468 Ast.FunDec {name; params; result; body; pos}
543d3420 469 }
a0db5523 470 ;
543d3420 471
28875fec
SK
472maybe_type_sig:
473 | { None }
474 | COLON ID { Some (Sym.of_string $2, pos ()) }
475 ;
476
477type_fields:
478 |
479 { [] }
a87678f0 480 | ID COLON ID
543d3420 481 {
28875fec
SK
482 let field =
483 Ast.Field
484 { name = Sym.of_string $1
485 ; escape = ref true
486 ; typ = Sym.of_string $3
487 ; pos = pos ()
488 }
489 in
490 field :: []
491 }
492 | ID COLON ID COMMA type_fields
493 {
494 let field =
495 Ast.Field
496 { name = Sym.of_string $1
497 ; escape = ref true
498 ; typ = Sym.of_string $3
499 ; pos = pos ()
500 }
501 in
502 field :: $5
543d3420 503 }
a0db5523 504 ;
543d3420 505
543d3420 506fun_args:
28875fec
SK
507 | { [] }
508 | exp { $1 :: [] }
509 | exp COMMA fun_args { $1 :: $3 }
a0db5523 510 ;
543d3420 511
543d3420 512lvalue:
28875fec 513 | ID
543d3420 514 {
28875fec
SK
515 Ast.SimpleVar
516 { symbol = Sym.of_string $1
517 ; pos = pos ()
518 }
543d3420 519 }
28875fec 520 | lvalue LBRACK exp RBRACK
543d3420 521 {
28875fec
SK
522 Ast.SubscriptVar
523 { var = $1
524 ; exp = $3
525 ; pos = pos ()
526 }
543d3420 527 }
28875fec 528 | lvalue DOT ID
543d3420 529 {
28875fec
SK
530 Ast.FieldVar
531 { var = $1
532 ; symbol = Sym.of_string $3
533 ; pos = pos ()
534 }
543d3420 535 }
a0db5523 536 ;
543d3420
SK
537
538%%
This page took 0.073552 seconds and 4 git commands to generate.