X-Git-Url: https://git.xandkar.net/?a=blobdiff_plain;f=tiger%2Fsrc%2Flib%2Ftiger%2Ftiger_parser.mly;h=1bb9388b7a3f5ac509bce3226a7a3fa3a16fcaad;hb=46486dc8836fede7e00aaa614d99d98ed7646bdc;hp=35cae281f4aa6a6cd981bc9b4723e94829bff29d;hpb=ef94563423aae9f1560d78a172770b3bf944a227;p=tiger.ml.git diff --git a/tiger/src/lib/tiger/tiger_parser.mly b/tiger/src/lib/tiger/tiger_parser.mly index 35cae28..1bb9388 100644 --- a/tiger/src/lib/tiger/tiger_parser.mly +++ b/tiger/src/lib/tiger/tiger_parser.mly @@ -1,5 +1,11 @@ %{ - open Printf + module Ast = Tiger_absyn + module Sym = Tiger_symbol + + let pos () = + Tiger_position.of_lexing_positions + ~pos_start:(Parsing.symbol_start_pos ()) + ~pos_end:(Parsing.symbol_end_pos ()) %} /* Declarations */ @@ -49,298 +55,479 @@ %token WHILE /* from lowest precedence */ +%nonassoc THEN +%nonassoc ELSE +%nonassoc ASSIGN +%nonassoc OF DO %left OR %left AND %nonassoc EQ NEQ GT LT GE LE %left PLUS MINUS %left TIMES DIVIDE -%nonassoc UMINUS +%nonassoc HIGHEST /* to highest precedence */ -%type program +%type program %start program %% -program: - | exp EOF - { - sprintf "program[%s]" $1 - } +program: exp EOF { $1 }; exp: | NIL - { - "nil[]" - } + { Ast.NilExp } | INT - { - sprintf "int[%d]" $1 - } - | MINUS exp %prec UMINUS - { - sprintf "negation[%s]" $2 - } - | ID LBRACK exp RBRACK OF exp - { - let type_id = $1 in - let exp_1 = $3 in - let exp_2 = $6 in - sprintf "array[type[%s], size[%s], val[%s]]" type_id exp_1 exp_2 - } - | ID LBRACE rec_field_assignments RBRACE + { Ast.IntExp $1 } + | MINUS exp %prec HIGHEST + { + Ast.OpExp + { left = Ast.IntExp 0 + ; oper = Ast.MinusOp + ; right = $2 + ; pos = pos () + } + } + | lvalue LBRACK exp RBRACK OF exp + { + match $1 with + | Ast.SimpleVar {symbol=typ; _} -> + Ast.ArrayExp + { typ + ; size = $3 + ; init = $6 + ; pos = pos () + } + | Ast.SubscriptVar _ | Ast.FieldVar _ -> + raise Parse_error + } + | ID LBRACE rec_fields_bind RBRACE { let type_id = $1 in - let rec_field_assignments = $3 in - sprintf - "record[type[%s], rec_field_assignments[%s]]" - type_id rec_field_assignments + let fields = $3 in + let typ = Sym.of_string type_id in + let pos = pos () in + Ast.RecordExp {fields; typ; pos} } | lvalue - { - $1 - } + { Ast.VarExp $1 } | lvalue ASSIGN exp { - sprintf "assign[%s := %s]" $1 $3 + let var = $1 in + let exp = $3 in + let pos = pos () in + Ast.AssignExp {var; exp; pos} } | STRING - { - sprintf "string[%S]" $1 - } - | fun_call - { - $1 - } - | exp op exp - { - sprintf "op[%s %s %s]" $1 $2 $3 + { Ast.StringExp {string = $1; pos = pos ()} } + | ID LPAREN fun_args RPAREN + { + Ast.CallExp + { func = Sym.of_string $1 + ; args = $3 + ; pos = pos () + } + } + | exp PLUS exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.PlusOp + ; right = $3 + ; pos = pos () + } + } + | exp MINUS exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.MinusOp + ; right = $3 + ; pos = pos () + } + } + | exp TIMES exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.TimesOp + ; right = $3 + ; pos = pos () + } + } + | exp DIVIDE exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.DivideOp + ; right = $3 + ; pos = pos () + } + } + | exp EQ exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.EqOp + ; right = $3 + ; pos = pos () + } + } + | exp NEQ exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.NeqOp + ; right = $3 + ; pos = pos () + } + } + | exp GT exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.GtOp + ; right = $3 + ; pos = pos () + } + } + | exp LT exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.LtOp + ; right = $3 + ; pos = pos () + } + } + | exp GE exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.GeOp + ; right = $3 + ; pos = pos () + } + } + | exp LE exp + { + Ast.OpExp + { left = $1 + ; oper = Ast.LeOp + ; right = $3 + ; pos = pos () + } + } + | exp AND exp + { + let e1 = $1 in + let e2 = $3 in + Ast.IfExp + { test = e1 + ; then' = e2 + ; else' = Some (Ast.IntExp 0) + ; pos = pos () + } + } + | exp OR exp + { + let e1 = $1 in + let e2 = $3 in + Ast.IfExp + { test = e1 + ; then' = Ast.IntExp 1 + ; else' = Some e2 + ; pos = pos () + } } | IF exp THEN exp ELSE exp { let e1 = $2 in let e2 = $4 in let e3 = $6 in - sprintf "if_then_else[%s, then[%s], else[%s]]" e1 e2 e3 + Ast.IfExp + { test = e1 + ; then' = e2 + ; else' = Some e3 + ; pos = pos () + } } | IF exp THEN exp { - sprintf "if_then[%s, then[%s]]" $2 $4 + let e1 = $2 in + let e2 = $4 in + Ast.IfExp + { test = e1 + ; then' = e2 + ; else' = None + ; pos = pos () + } } | WHILE exp DO exp { - sprintf "while[%s, do[%s]]" $2 $4 - } - | FOR id ASSIGN exp TO exp DO exp - { - let id = $2 in - let e1 = $4 in - let e2 = $6 in - let e3 = $8 in - sprintf "for[%s := %s, to[%s], do[%s]]" id e1 e2 e3 + let e1 = $2 in + let e2 = $4 in + Ast.WhileExp + { test = e1 + ; body = e2 + ; pos = pos () + } + } + | FOR ID ASSIGN exp TO exp DO exp + { + let var = $2 in + let e1 = $4 in + let e2 = $6 in + let e3 = $8 in + Ast.ForExp + { var = Sym.of_string var + ; escape = ref true + ; lo = e1 + ; hi = e2 + ; body = e3 + ; pos = pos () + } } | BREAK - { - "break[]" - } - | LPAREN seq RPAREN - { - sprintf "seq[%s]" $2 - } - | LET decs IN seq END + { Ast.BreakExp (pos ()) } + | LPAREN exps RPAREN + { Ast.SeqExp $2 } + | LET decs IN exps END { let decs = $2 in - let seq = $4 in - sprintf "let[decs[%s], in[seq[%s]]]" decs seq - } - | unit - { - $1 + let exps = $4 in + Ast.LetExp {decs; body = Ast.SeqExp exps; pos = pos ()} } + ; -seq: - | exp - { - sprintf "%s" $1 - } - | exp SEMICOLON seq - { - sprintf "%s; %s" $1 $3 - } +exps: + | { [] } + | exp { ($1, pos ()) :: [] } + | exp SEMICOLON exps { ($1, pos ()) :: $3 } + ; -decs: - | dec - { - sprintf "%s" $1 - } - | dec decs - { - sprintf "%s %s" $1 $2 - } +rec_fields_bind: + | ID EQ exp { (Sym.of_string $1, $3, pos ()) :: [] } + | ID EQ exp COMMA rec_fields_bind { (Sym.of_string $1, $3, pos ()) :: $5 } + ; -dec: - | tydec {$1} - | vardec {$1} - | fundec {$1} +/* ------------------------------------------------------------------------- */ +/* BEGIN unintuitive rules for decs (which avoid shift/reduce conflicts) */ +/* ------------------------------------------------------------------------- */ +/* + In order to support mutual recursion, we need to group consecutive + type and function declarations (see Tiger-book pages 97-99). -fundec: - | FUNCTION id unit EQ exp - { - let id = $2 in - let exp = $5 in - sprintf "fundec[%s, exp[%s]]" id exp - } - | FUNCTION id LPAREN tyfields RPAREN EQ exp - { - let id = $2 in - let tyfields = $4 in - let exp = $7 in - sprintf "fundec[%s, tyfields[%s], exp[%s]]" id tyfields exp - } - | FUNCTION id LPAREN tyfields RPAREN COLON ID EQ exp - { - let id = $2 in - let tyfields = $4 in - let type_id = $7 in - let exp = $9 in - sprintf - "fundec[%s, tyfields[%s], type_id[%s], exp[%s]]" - id tyfields type_id exp - } + Initially, I defined the rules to do so as: -vardec: - | VAR id ASSIGN exp - { - let id = $2 in - let exp = $4 in - sprintf "vardec[%s, exp[%s]]" id exp - } - | VAR id COLON ID ASSIGN exp - { - let id = $2 in - let type_id = $4 in - let exp = $6 in - sprintf "vardec[%s, type_id[%s], exp[%s]]" id type_id exp - } + decs: + | dec { $1 :: [] } + | dec decs { $1 :: $2 } + ; + dec: + | var_dec { $1 } + | typ_decs { Ast.TypeDecs $1 } + | fun_decs { Ast.FunDecs $1 } + ; -tydec: - | TYPE ID EQ ty - { - let type_id = $2 in - let ty = $4 in - sprintf "tydec[%s, %s]" type_id ty - } + which, while straightforward (and working, because ocamlyacc defaults to + shift in case of a conflict), nonetheless caused a shift/reduce conflict in + each of: typ_decs and fun_decs; where the parser did not know whether to + shift and stay in (typ|fun_)_dec state or to reduce and get back to dec + state. -ty: - | ID - { - let type_id = $1 in - sprintf "type[type_id[%S]]" type_id - } - | LBRACE RBRACE - { - "record[]" - } - | LBRACE tyfields RBRACE - { - let tyfields = $2 in - sprintf "record[%s]" tyfields - } - | ARRAY OF ID - { - let type_id = $3 in - sprintf "array_of_type[%s]" type_id - } + Sadly, tagging the rules with a lower precedence (to explicitly favor + shifting) - does not help :( -tyfields: -/*| epsilon */ - | tyfield - {$1} - | tyfield COMMA tyfields - { - let tyfield = $1 in - let tyfields = $3 in - sprintf "%s, %s" tyfield tyfields - } + %nonassoc LOWEST + ... + dec: + | var_dec { $1 } + | typ_decs %prec LOWEST { Ast.TypeDecs $1 } + | fun_decs %prec LOWEST { Ast.FunDecs $1 } + ; -tyfield: - | id COLON ID - { - let id = $1 in - let type_id = $3 in - sprintf "tyfield[%s, %s]" id type_id - } + The difficulty seems to be in the lack of a separator token which would be + able to definitively mark the end of each sequence of consecutive + (typ_|fun_) declarations. -id: - | ID - { - sprintf "id[%S]" $1 - } + Keeping this in mind, another alternative is to manually capture the possible + interspersion patterns in the rules like: -/* Perhaps "void"? */ -unit: - | LPAREN RPAREN - { - "unit[]" - } + (N * foo) followed-by (N * not-foo) -rec_field_assignments: - | id EQ exp - { - sprintf "%S = %s" $1 $3 - } - | id EQ exp COMMA rec_field_assignments - { - sprintf "%S = %s, %s" $1 $3 $5 - } + for the exception of var_dec, which, since we do not need to group its + consecutive sequences, can be reduced upon first sighting. +*/ + +decs: + | var_dec decs_any { $1 :: $2 } + | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 } + | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 } + ; -fun_call: - | id unit +decs_any: + | { [] } + | var_dec decs_any { $1 :: $2 } + | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 } + | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 } + ; + +decs_any_but_fun: + | { [] } + | var_dec decs_any { $1 :: $2 } + | typ_decs decs_any_but_typ { (Ast.TypeDecs $1) :: $2 } + ; + +decs_any_but_typ: + | { [] } + | var_dec decs_any { $1 :: $2 } + | fun_decs decs_any_but_fun { (Ast.FunDecs $1) :: $2 } + ; + +/*---------------------------------------------------------------------------*/ +/* END unintuitive rules for decs (which avoid shift/reduce conflicts) */ +/*---------------------------------------------------------------------------*/ + +typ_decs: + | typ_dec { $1 :: [] } + | typ_dec typ_decs { $1 :: $2 } + ; + +typ_dec: + | TYPE ID EQ ID { - sprintf "fun_call[%s, %s]" $1 $2 + let type_id_left = $2 in + let type_id_right = $4 in + let pos = pos () in (* FIXME: rhs id should have its own pos, no? *) + Ast.TypeDec + { name = Sym.of_string type_id_left + ; ty = Ast.NameTy {symbol = Sym.of_string type_id_right; pos} + ; pos + } } - | id LPAREN fun_args RPAREN + | TYPE ID EQ LBRACE type_fields RBRACE { - sprintf "fun_call[%s, %s]" $1 $3 - } + let type_id = $2 in + let type_fields = $5 in + Ast.TypeDec + { name = Sym.of_string type_id + ; ty = Ast.RecordTy type_fields + ; pos = pos () + } + } + | TYPE ID EQ ARRAY OF ID + { + let type_id = Sym.of_string $2 in + let element_type_id = Sym.of_string $6 in + let pos = pos () in + Ast.TypeDec + { name = type_id + ; ty = Ast.ArrayTy {symbol = element_type_id; pos} + ; pos + } + } + ; -fun_args: - | exp - { - $1 - } - | exp COMMA fun_args +var_dec: + | VAR ID maybe_type_sig ASSIGN exp + { + let var_id = Sym.of_string $2 in + let maybe_type_sig = $3 in + let exp = $5 in + let pos = pos () in + Ast.VarDec + { name = var_id + ; escape = ref true + ; typ = maybe_type_sig + ; init = exp + ; pos + } + } + ; + +fun_decs: + | fun_dec { $1 :: [] } + | fun_dec fun_decs { $1 :: $2 } + ; + +fun_dec: + | FUNCTION ID LPAREN type_fields RPAREN maybe_type_sig EQ exp { - sprintf "%s, %s" $1 $3 + let name = Sym.of_string $2 in + let params = $4 in + let result = $6 in + let body = $8 in + let pos = pos () in + Ast.FunDec {name; params; result; body; pos} } + ; -op: - | PLUS {"+"} - | MINUS {"-"} - | TIMES {"*"} - | DIVIDE {"/"} - | EQ {"="} - | NEQ {"<>"} - | GT {">"} - | LT {"<"} - | GE {">="} - | LE {"<="} - | AND {"&"} - | OR {"|"} +maybe_type_sig: + | { None } + | COLON ID { Some (Sym.of_string $2, pos ()) } + ; + +type_fields: + | + { [] } + | ID COLON ID + { + let field = + Ast.Field + { name = Sym.of_string $1 + ; escape = ref true + ; typ = Sym.of_string $3 + ; pos = pos () + } + in + field :: [] + } + | ID COLON ID COMMA type_fields + { + let field = + Ast.Field + { name = Sym.of_string $1 + ; escape = ref true + ; typ = Sym.of_string $3 + ; pos = pos () + } + in + field :: $5 + } + ; + +fun_args: + | { [] } + | exp { $1 :: [] } + | exp COMMA fun_args { $1 :: $3 } + ; lvalue: - | id + | ID { - sprintf "lvalue[%s]" $1 + Ast.SimpleVar + { symbol = Sym.of_string $1 + ; pos = pos () + } } - | lvalue DOT id + | lvalue LBRACK exp RBRACK { - sprintf "get_record_field[%s, %s]" $1 $3 + Ast.SubscriptVar + { var = $1 + ; exp = $3 + ; pos = pos () + } } - | lvalue LBRACK exp RBRACK + | lvalue DOT ID { - sprintf "get_array_subscript[%s, %s]" $1 $3 + Ast.FieldVar + { var = $1 + ; symbol = Sym.of_string $3 + ; pos = pos () + } } + ; %%