FIXME: 85 shift/reduce conflicts, 1 reduce/reduce conflict.
+open Printf
+
module List = ListLabels
let test_01 =
"
in
let tokens =
- let open Tiger.Parser.Token in
+ let open Tiger.Parser in
[ LET;
TYPE; ID "arrtype"; EQ; ARRAY; OF; ID "int";
VAR; ID "arr1"; COLON; ID "arrtype"; ASSIGN;
"
in
let tokens =
- let open Tiger.Parser.Token in
+ let open Tiger.Parser in
[ LET;
TYPE; ID "myint"; EQ; ID "int";
TYPE; ID "arrtype"; EQ; ARRAY; OF; ID "myint";
"
in
let tokens =
- let open Tiger.Parser.Token in
+ let open Tiger.Parser in
[ LET;
TYPE; ID "rectype"; EQ;
LBRACE; ID "name"; COLON; ID "string";
"
in
let tokens =
- let open Tiger.Parser.Token in
+ let open Tiger.Parser in
[ LET;
FUNCTION; ID "nfactor"; LPAREN; ID "n"; COLON; ID "int"; RPAREN; COLON; ID "int"; EQ;
IF; ID "n"; EQ; INT 0;
let test_09 =
let name = "error : types of then - else differ" in
let code =
- "
- /* "^name^" */
-
- if (5>4) then 13 else \" \"
+ " \
+ /* "^name^" */ \
+ if (5>4) then 13 else \" \" \
"
in
let tokens =
- let open Tiger.Parser.Token in
+ let open Tiger.Parser in
[ IF; LPAREN; INT 5; GT; INT 4; RPAREN; THEN; INT 13; ELSE; STRING " "
]
in
let tokens_of_code code =
let lexbuf = Lexing.from_string code in
let rec tokens () =
- match Tiger.Lexer.token lexbuf with
- | None -> []
- | Some token -> token :: tokens ()
+ let token = Tiger.Lexer.token lexbuf in
+ (* Avoiding fragile pattern-matching *)
+ if token = Tiger.Parser.EOF then [] else token :: tokens ()
in
tokens ()
+let parsetree_of_code code =
+ let lb = Lexing.from_string code in
+ (match Tiger.Parser.program Tiger.Lexer.token lb with
+ | exception Parsing.Parse_error ->
+ let module L = Lexing in
+ let L.({lex_curr_p = {pos_lnum=l; pos_bol=b; pos_cnum=c; _}; _}) = lb in
+ let msg = sprintf "Syntax error around line: %d, column: %d" l (c - b) in
+ Error msg
+ | parsetree ->
+ Ok parsetree
+ )
+
let tests =
[ test_01
; test_02
let () =
let bar_sep = String.make 80 '-' in
let bar_end = String.make 80 '=' in
+ let indent n = String.make (2 * n) ' ' in
+ let color_on_green = "\027[0;32m" in
+ let color_on_red = "\027[1;31m" in
+ let color_off = "\027[0m" in
List.iteri tests ~f:(fun i (name, code, tokens_expected) ->
let i = i + 1 in (* Because iteri starts with 0 *)
- let open Printf in
- printf "%s\n Test %d : %S\n" bar_sep i name;
+ printf "%s\n%sTest %d : %S\n" bar_sep (indent 0) i name;
+
+ printf "%sLexing : " (indent 1);
let tokens_emitted = tokens_of_code code in
(try
assert (tokens_emitted = tokens_expected);
- print_endline " ---> OK";
+ printf "%sOK%s\n" color_on_green color_off;
with Assert_failure _ ->
let tokens_to_string tokens =
- String.concat "; " (List.map ~f:Tiger.Parser.Token.to_string tokens)
+ String.concat "; " (List.map ~f:Tiger.Parser_token.to_string tokens)
in
printf
- " ---> ERROR\n Expected: %s\n Emitted : %s\n\n"
+ "%sERROR%s\n%sExpected: %s\n%sEmitted : %s\n\n"
+ color_on_red
+ color_off
+ (indent 2)
(tokens_to_string tokens_expected)
+ (indent 2)
(tokens_to_string tokens_emitted)
);
+
+ printf "%sParsing: " (indent 1);
+ (match parsetree_of_code code with
+ | Error errmsg -> printf "%sERROR:%s %s\n" color_on_red color_off errmsg
+ | Ok parsetree -> printf "%sOK:%s %s\n" color_on_green color_off parsetree
+ );
+
);
print_endline bar_end;
-open Printf
-
let () =
let path_to_program_file = Sys.argv.(1) in
let ic = open_in path_to_program_file in
let lexbuf = Lexing.from_channel ic in
- let rec parse_and_print () =
- match Tiger.Lexer.token lexbuf with
- | None ->
- ()
- | Some token ->
- printf "%s\n" (Tiger.Parser.Token.to_string token);
- parse_and_print ()
- in
- parse_and_print ();
+ (match Tiger.Parser.program Tiger.Lexer.token lexbuf with
+ | exception Parsing.Parse_error ->
+ let
+ Lexing.({lex_curr_p = {pos_lnum; pos_bol; pos_cnum; _}; _}) = lexbuf
+ in
+ Printf.printf
+ "Syntax error in file %S, around line: %d, column: %d\n"
+ path_to_program_file pos_lnum (pos_cnum - pos_bol)
+ | program ->
+ print_endline program
+ );
close_in ic;
module Lexer = Tiger_lexer
module Parser = Tiger_parser
+module Parser_token = Tiger_parser_token
-val token : Lexing.lexbuf -> Tiger_parser.Token.t option
+val token : Lexing.lexbuf -> Tiger_parser.token
{
- open Tiger_parser.Token
+ open Tiger_parser
let comment_level = ref 0
let string_buf = Buffer.create 100
let newline = '\n' | '\r' | "\n\r"
rule token = parse
- | eof {
- None
- }
+ | eof {EOF}
(* Track line number *)
| newline {
comment lexbuf
}
- | ":=" {Some ASSIGN}
- | "<=" {Some LE}
- | ">=" {Some GE}
- | "<>" {Some NEQ}
- | '&' {Some AND}
- | '(' {Some LPAREN}
- | ')' {Some RPAREN}
- | '*' {Some TIMES}
- | '+' {Some PLUS}
- | '-' {Some MINUS}
- | '/' {Some DIVIDE}
- | ',' {Some COMMA}
- | '.' {Some DOT}
- | ':' {Some COLON}
- | ';' {Some SEMICOLON}
- | '>' {Some GT}
- | '<' {Some LT}
- | '=' {Some EQ}
- | '[' {Some LBRACK}
- | ']' {Some RBRACK}
- | '{' {Some LBRACE}
- | '}' {Some RBRACE}
- | '|' {Some OR}
+ | ":=" {ASSIGN}
+ | "<=" {LE}
+ | ">=" {GE}
+ | "<>" {NEQ}
+ | '&' {AND}
+ | '(' {LPAREN}
+ | ')' {RPAREN}
+ | '*' {TIMES}
+ | '+' {PLUS}
+ | '-' {MINUS}
+ | '/' {DIVIDE}
+ | ',' {COMMA}
+ | '.' {DOT}
+ | ':' {COLON}
+ | ';' {SEMICOLON}
+ | '>' {GT}
+ | '<' {LT}
+ | '=' {EQ}
+ | '[' {LBRACK}
+ | ']' {RBRACK}
+ | '{' {LBRACE}
+ | '}' {RBRACE}
+ | '|' {OR}
(* String literal *)
| '"' {
}
| (num+ as int) {
- Some (INT (int_of_string int))
+ INT (int_of_string int)
}
| (alpha (alpha | num | '_')* as id) {
match id with
- | "array" -> Some ARRAY
- | "break" -> Some BREAK
- | "do" -> Some DO
- | "else" -> Some ELSE
- | "end" -> Some END
- | "for" -> Some FOR
- | "function" -> Some FUNCTION
- | "if" -> Some IF
- | "in" -> Some IN
- | "let" -> Some LET
- | "nil" -> Some NIL
- | "of" -> Some OF
- | "then" -> Some THEN
- | "to" -> Some TO
- | "type" -> Some TYPE
- | "var" -> Some VAR
- | "while" -> Some WHILE
- | _ -> Some (ID id)
+ | "array" -> ARRAY
+ | "break" -> BREAK
+ | "do" -> DO
+ | "else" -> ELSE
+ | "end" -> END
+ | "for" -> FOR
+ | "function" -> FUNCTION
+ | "if" -> IF
+ | "in" -> IN
+ | "let" -> LET
+ | "nil" -> NIL
+ | "of" -> OF
+ | "then" -> THEN
+ | "to" -> TO
+ | "type" -> TYPE
+ | "var" -> VAR
+ | "while" -> WHILE
+ | _ -> (ID id)
}
and string_literal = parse
(* Keep escaped quote marks as part of the string literal *)
| '"' {
let string = Buffer.contents string_buf in
Buffer.reset string_buf;
- Some (STRING string)
+ STRING string
}
string_literal lexbuf
}
and comment = parse
- | eof {
- (* TODO: Error: unterminated comment? or we don't care? *)
- None
- }
+ (* TODO: Error: unterminated comment? or we don't care? *)
+ | eof {EOF}
(* Track line number *)
| newline {
+++ /dev/null
-open Printf
-
-module Token = struct
- type t =
- | AND
- | ARRAY
- | ASSIGN
- | BREAK
- | COLON
- | COMMA
- | DIVIDE
- | DO
- | DOT
- | ELSE
- | END
- | EOF
- | EQ
- | FOR
- | FUNCTION
- | GE
- | GT
- | ID of string
- | IF
- | IN
- | INT of int
- | LBRACE
- | LBRACK
- | LE
- | LET
- | LPAREN
- | LT
- | MINUS
- | NEQ
- | NIL
- | OF
- | OR
- | PLUS
- | RBRACE
- | RBRACK
- | RPAREN
- | SEMICOLON
- | STRING of string
- | THEN
- | TIMES
- | TO
- | TYPE
- | VAR
- | WHILE
-
- let to_string = function
- | TYPE -> "TYPE"
- | VAR -> "VAR"
- | FUNCTION -> "FUNCTION"
- | BREAK -> "BREAK"
- | OF -> "OF"
- | END -> "END"
- | IN -> "IN"
- | NIL -> "NIL"
- | LET -> "LET"
- | DO -> "DO"
- | TO -> "TO"
- | FOR -> "FOR"
- | WHILE -> "WHILE"
- | ELSE -> "ELSE"
- | THEN -> "THEN"
- | IF -> "IF"
- | ARRAY -> "ARRAY"
- | ASSIGN -> "ASSIGN"
- | OR -> "OR"
- | AND -> "AND"
- | GE -> "GE"
- | GT -> "GT"
- | LE -> "LE"
- | LT -> "LT"
- | NEQ -> "NEQ"
- | EQ -> "EQ"
- | DIVIDE -> "DIVIDE"
- | TIMES -> "TIMES"
- | MINUS -> "MINUS"
- | PLUS -> "PLUS"
- | DOT -> "DOT"
- | RBRACE -> "RBRACE"
- | LBRACE -> "LBRACE"
- | RBRACK -> "RBRACK"
- | LBRACK -> "LBRACK"
- | RPAREN -> "RPAREN"
- | LPAREN -> "LPAREN"
- | SEMICOLON -> "SEMICOLON"
- | COLON -> "COLON"
- | COMMA -> "COMMA"
- | STRING s -> sprintf "STRING (%S)" s
- | INT i -> sprintf "INT (%d)" i
- | ID id -> sprintf "ID (%s)" id
- | EOF -> "EOF"
-end
+++ /dev/null
-module Token : sig
- type t =
- | AND
- | ARRAY
- | ASSIGN
- | BREAK
- | COLON
- | COMMA
- | DIVIDE
- | DO
- | DOT
- | ELSE
- | END
- | EOF
- | EQ
- | FOR
- | FUNCTION
- | GE
- | GT
- | ID of string
- | IF
- | IN
- | INT of int
- | LBRACE
- | LBRACK
- | LE
- | LET
- | LPAREN
- | LT
- | MINUS
- | NEQ
- | NIL
- | OF
- | OR
- | PLUS
- | RBRACE
- | RBRACK
- | RPAREN
- | SEMICOLON
- | STRING of string
- | THEN
- | TIMES
- | TO
- | TYPE
- | VAR
- | WHILE
-
- val to_string : t -> string
-end
--- /dev/null
+%{
+ open Printf
+%}
+
+/* Declarations */
+%token AND
+%token ARRAY
+%token ASSIGN
+%token BREAK
+%token COLON
+%token COMMA
+%token DIVIDE
+%token DO
+%token DOT
+%token ELSE
+%token END
+%token EOF
+%token EQ
+%token FOR
+%token FUNCTION
+%token GE
+%token GT
+%token <string> ID
+%token IF
+%token IN
+%token <int> INT
+%token LBRACE
+%token LBRACK
+%token LE
+%token LET
+%token LPAREN
+%token LT
+%token MINUS
+%token NEQ
+%token NIL
+%token OF
+%token OR
+%token PLUS
+%token RBRACE
+%token RBRACK
+%token RPAREN
+%token SEMICOLON
+%token <string> STRING
+%token THEN
+%token TIMES
+%token TO
+%token TYPE
+%token VAR
+%token WHILE
+
+/* from lowest precedence */
+%left OR
+%left AND
+%nonassoc EQ NEQ GT LT GE LE
+%left PLUS MINUS
+%left TIMES DIVIDE
+%nonassoc UMINUS
+/* to highest precedence */
+
+%type <string> program
+
+%start program
+
+%%
+
+program:
+ | exp EOF
+ {
+ sprintf "program[%s]" $1
+ }
+
+exp:
+ | NIL
+ {
+ "nil[]"
+ }
+ | INT
+ {
+ sprintf "int[%d]" $1
+ }
+ | MINUS exp %prec UMINUS
+ {
+ sprintf "negation[%s]" $2
+ }
+ | type_id LBRACK exp RBRACK OF exp
+ {
+ sprintf "array[type[%s], size[%s], val[%s]]" $1 $3 $6
+ }
+ | type_id LBRACE rec_field_assignments RBRACE
+ {
+ sprintf "record[type[%s], rec_field_assignments[%s]]" $1 $3
+ }
+ | lvalue
+ {
+ $1
+ }
+ | lvalue ASSIGN exp
+ {
+ sprintf "assign[%s := %s]" $1 $3
+ }
+ | STRING
+ {
+ sprintf "string[%S]" $1
+ }
+ | fun_call
+ {
+ $1
+ }
+ | exp op exp
+ {
+ sprintf "op[%s %s %s]" $1 $2 $3
+ }
+ | IF exp THEN exp ELSE exp
+ {
+ let e1 = $2 in
+ let e2 = $4 in
+ let e3 = $6 in
+ sprintf "if_then_else[%s, then[%s], else[%s]]" e1 e2 e3
+ }
+ | IF exp THEN exp
+ {
+ sprintf "if_then[%s, then[%s]]" $2 $4
+ }
+ | WHILE exp DO exp
+ {
+ sprintf "while[%s, do[%s]]" $2 $4
+ }
+ | FOR id ASSIGN exp TO exp DO exp
+ {
+ let id = $2 in
+ let e1 = $4 in
+ let e2 = $6 in
+ let e3 = $8 in
+ sprintf "for[%s := %s, to[%s], do[%s]]" id e1 e2 e3
+ }
+ | BREAK
+ {
+ "break[]"
+ }
+ | LPAREN seq RPAREN
+ {
+ sprintf "seq[%s]" $2
+ }
+ | LET decs IN seq END
+ {
+ let decs = $2 in
+ let seq = $4 in
+ sprintf "let[decs[%s], in[seq[%s]]]" decs seq
+ }
+ | unit
+ {
+ $1
+ }
+
+seq:
+ | exp
+ {
+ sprintf "%s" $1
+ }
+ | exp SEMICOLON seq
+ {
+ sprintf "%s; %s" $1 $3
+ }
+
+decs:
+ | dec
+ {
+ sprintf "%s" $1
+ }
+ | dec decs
+ {
+ sprintf "%s %s" $1 $2
+ }
+
+dec:
+ | tydec {$1}
+ | vardec {$1}
+ | fundec {$1}
+
+fundec:
+ | FUNCTION id LPAREN tyfields RPAREN EQ exp
+ {
+ let id = $2 in
+ let tyfields = $4 in
+ let exp = $7 in
+ sprintf "fundec[%s, tyfields[%s], exp[%s]]" id tyfields exp
+ }
+ | FUNCTION id LPAREN tyfields RPAREN COLON type_id EQ exp
+ {
+ let id = $2 in
+ let tyfields = $4 in
+ let type_id = $7 in
+ let exp = $9 in
+ sprintf
+ "fundec[%s, tyfields[%s], type_id[%s], exp[%s]]"
+ id tyfields type_id exp
+ }
+
+vardec:
+ | VAR id ASSIGN exp
+ {
+ let id = $2 in
+ let exp = $4 in
+ sprintf "vardec[%s, exp[%s]]" id exp
+ }
+ | VAR id COLON type_id ASSIGN exp
+ {
+ let id = $2 in
+ let tyid = $4 in
+ let exp = $6 in
+ sprintf "vardec[%s, type_id[%s], exp[%s]]" id tyid exp
+ }
+
+tydec:
+ | TYPE type_id EQ ty
+ {
+ sprintf "tydec[%s, %s]" $2 $4
+ }
+
+ty:
+ | type_id
+ {$1}
+ | LBRACE RBRACE
+ {
+ "record[]"
+ }
+ | LBRACE tyfields RBRACE
+ {
+ let tyfields = $2 in
+ sprintf "record[%s]" tyfields
+ }
+ | ARRAY OF type_id
+ {
+ let type_id = $3 in
+ sprintf "array_of_type[%s]" type_id
+ }
+
+tyfields:
+/*| epsilon */
+ | tyfield
+ {$1}
+ | tyfield COMMA tyfields
+ {
+ let tyfield = $1 in
+ let tyfields = $3 in
+ sprintf "%s, %s" tyfield tyfields
+ }
+
+tyfield:
+ | id COLON type_id
+ {
+ let id = $1 in
+ let type_id = $3 in
+ sprintf "tyfield[%s, %s]" id type_id
+ }
+
+id:
+ | ID
+ {
+ sprintf "id[%S]" $1
+ }
+
+/* Perhaps "void"? */
+unit:
+ | LPAREN RPAREN
+ {
+ "unit[]"
+ }
+
+type_id:
+ | id
+ {
+ sprintf "type_id[%S]" $1
+ }
+
+rec_field_assignments:
+ | id EQ exp
+ {
+ sprintf "%S = %s" $1 $3
+ }
+ | id EQ exp COMMA rec_field_assignments
+ {
+ sprintf "%S = %s, %s" $1 $3 $5
+ }
+
+fun_call:
+ | id unit
+ {
+ sprintf "fun_call[%s, %s]" $1 $2
+ }
+ | id LPAREN fun_args RPAREN
+ {
+ sprintf "fun_call[%s, %s]" $1 $3
+ }
+
+fun_args:
+ | exp
+ {
+ $1
+ }
+ | exp COMMA fun_args
+ {
+ sprintf "%s, %s" $1 $3
+ }
+
+op:
+ | PLUS {"+"}
+ | MINUS {"-"}
+ | TIMES {"*"}
+ | DIVIDE {"/"}
+ | EQ {"="}
+ | NEQ {"<>"}
+ | GT {">"}
+ | LT {"<"}
+ | GE {">="}
+ | LE {"<="}
+ | AND {"&"}
+ | OR {"|"}
+
+lvalue:
+ | id
+ {
+ sprintf "lvalue[%s]" $1
+ }
+ | lvalue DOT id
+ {
+ sprintf "get_record_field[%s, %s]" $1 $3
+ }
+ | lvalue LBRACK exp RBRACK
+ {
+ sprintf "get_array_subscript[%s, %s]" $1 $3
+ }
+
+%%
--- /dev/null
+open Printf
+
+module T = Tiger_parser
+
+type t = T.token
+
+let to_string = function
+ | T.TYPE -> "TYPE"
+ | T.VAR -> "VAR"
+ | T.FUNCTION -> "FUNCTION"
+ | T.BREAK -> "BREAK"
+ | T.OF -> "OF"
+ | T.END -> "END"
+ | T.IN -> "IN"
+ | T.NIL -> "NIL"
+ | T.LET -> "LET"
+ | T.DO -> "DO"
+ | T.TO -> "TO"
+ | T.FOR -> "FOR"
+ | T.WHILE -> "WHILE"
+ | T.ELSE -> "ELSE"
+ | T.THEN -> "THEN"
+ | T.IF -> "IF"
+ | T.ARRAY -> "ARRAY"
+ | T.ASSIGN -> "ASSIGN"
+ | T.OR -> "OR"
+ | T.AND -> "AND"
+ | T.GE -> "GE"
+ | T.GT -> "GT"
+ | T.LE -> "LE"
+ | T.LT -> "LT"
+ | T.NEQ -> "NEQ"
+ | T.EQ -> "EQ"
+ | T.DIVIDE -> "DIVIDE"
+ | T.TIMES -> "TIMES"
+ | T.MINUS -> "MINUS"
+ | T.PLUS -> "PLUS"
+ | T.DOT -> "DOT"
+ | T.RBRACE -> "RBRACE"
+ | T.LBRACE -> "LBRACE"
+ | T.RBRACK -> "RBRACK"
+ | T.LBRACK -> "LBRACK"
+ | T.RPAREN -> "RPAREN"
+ | T.LPAREN -> "LPAREN"
+ | T.SEMICOLON -> "SEMICOLON"
+ | T.COLON -> "COLON"
+ | T.COMMA -> "COMMA"
+ | T.STRING s -> sprintf "STRING (%S)" s
+ | T.INT i -> sprintf "INT (%d)" i
+ | T.ID id -> sprintf "ID (%s)" id
+ | T.EOF -> "EOF"
--- /dev/null
+type t = Tiger_parser.token
+
+val to_string : t -> string