| [x] | 1.01.e.1.c | ---- demo unbalanced behaviour | --- | -- | -- | 2018-04-18 | 2018-04-18 |
| [x] | 1.01.e.1.d | ---- find functional balanced tree | --- | -- | -- | 2018-04-19 | 2018-04-20 |
| ------ | ---------- | ---------------------------------------- | ----- | -------- | ------ | ---------- | ---------- |
-| [ ] | 1.02 | -- Lexical Analysis | 024 | 01 | -- | ---------- | ---------- |
+| [ ] | 1.02 | -- Lexical Analysis | 024 | 01 | 01 | 2018-05-22 | 2018-05-22 |
| [ ] | 1.02.1 | --- Lexical tokens | 001 | -- | -- | ---------- | ---------- |
| [ ] | 1.02.2 | --- Regular expressions | 003 | -- | -- | ---------- | ---------- |
| [ ] | 1.02.3 | --- Finite automata | 003 | -- | -- | ---------- | ---------- |
| [ ] | 1.02.4 | --- Nondeterministic finite automata | 006 | -- | -- | ---------- | ---------- |
| [ ] | 1.02.5 | --- ML-Lex: a lexical analyzer generator | 003 | -- | -- | ---------- | ---------- |
| [ ] | 1.02.p | --- Program | 002 | -- | -- | ---------- | ---------- |
-| [ ] | 1.02.p.1 | ---- Tiger lexer | --- | -- | -- | ---------- | ---------- |
+| [ ] | 1.02.p.1 | ---- Tiger lexer | --- | 01 | 01 | 2018-05-22 | 2018-05-22 |
| [ ] | 1.02.e | --- Exercises | 004 | -- | -- | ---------- | ---------- |
| [ ] | 1.02.e.01 | ---- regexes | --- | -- | -- | ---------- | ---------- |
| [ ] | 1.02.e.02 | ---- why no regexes | --- | -- | -- | ---------- | ---------- |
MAKEFLAGS := --no-builtin-rules
-EXE_TYPE := byte # byte | native
-EXECUTABLES := tiger
+EXE_TYPE := byte # byte | native
+EXECUTABLES := tigerc tiger_tests
OCAMLBUILD_FLAGS := -I src/exe -I src/lib/tiger
-OCAMLBUILD := ocamlbuild $(OCAMLBUILD_FLAGS)
+OCAMLBUILD := ocamlbuild $(OCAMLBUILD_FLAGS)
.PHONY: \
all \
test
all:
- $(MAKE) clean
- $(MAKE) build
- $(MAKE) test
+ @$(MAKE) -s clean
+ @$(MAKE) -s build
+ @$(MAKE) -s test
build:
- $(OCAMLBUILD) $(addsuffix _main.$(EXE_TYPE),$(EXECUTABLES))
- mkdir -p bin/exe
- $(foreach exe,$(EXECUTABLES),cp _build/src/exe/$(exe)_main.$(EXE_TYPE) bin/exe/$(exe))
- rm $(addsuffix _main.$(EXE_TYPE),$(EXECUTABLES))
+ @$(OCAMLBUILD) $(addsuffix .$(EXE_TYPE),$(EXECUTABLES))
+ @mkdir -p bin/exe
+ $(foreach exe,$(EXECUTABLES),cp _build/src/exe/$(exe).$(EXE_TYPE) bin/exe/$(exe); )
+ @rm $(addsuffix .$(EXE_TYPE),$(EXECUTABLES))
clean:
- $(OCAMLBUILD) -clean
- rm -rf ./bin
+ @$(OCAMLBUILD) -clean
+ @rm -rf ./bin
-test: bin/exe/tiger
- ./$<
+test: bin/exe/tiger_tests
+ @./$<
+++ /dev/null
-let () =
- let bar = String.make 80 '-' in
- Printf.printf "%s\nTiger says: %S\n%s\n" bar Tiger.Growl.text bar;
--- /dev/null
+open Printf
+
+module List = ListLabels
+
+let test_01 =
+ let code =
+ "
+ /* an array type and an array variable */
+ let
+ type arrtype = array of int
+ var arr1:arrtype := arrtype [10] of 0
+ in
+ arr1
+ end
+ "
+ in
+ let tokens =
+ let open Tiger.Parser.Token in
+ [ LET;
+ TYPE; ID "arrtype"; EQ; ARRAY; OF; ID "int";
+ VAR; ID "arr1"; COLON; ID "arrtype"; ASSIGN;
+ ID "arrtype"; LBRACK; INT 10; RBRACK; OF; INT 0;
+ IN;
+ ID "arr1";
+ END
+ ]
+ in
+ (code, tokens)
+
+let tokens_of_code code =
+ let lexbuf = Lexing.from_string code in
+ let rec tokens () =
+ match Tiger.Lexer.token lexbuf with
+ | Tiger.Parser.Token.EOF -> []
+ | token -> token :: tokens ()
+ in
+ tokens ()
+
+let tests =
+ [ test_01
+ ]
+
+let () =
+ List.iter tests ~f:(fun (code, tokens_expected) ->
+ assert ((tokens_of_code code) = tokens_expected)
+ )
--- /dev/null
+open Printf
+
+let () =
+ let path_to_program_file = Sys.argv.(1) in
+ let ic = open_in path_to_program_file in
+ let lexbuf = Lexing.from_channel ic in
+ let rec parse_and_print () =
+ let token = Tiger.Lexer.token lexbuf in
+ printf "%s\n" (Tiger.Parser.Token.to_string token);
+ match token with
+ | Tiger.Parser.Token.EOF -> ()
+ | _ -> parse_and_print ()
+ in
+ parse_and_print ();
+ close_in ic;
-module Growl = struct let text = "Grrrrrrrrr!" end
+module Lexer = Tiger_lexer
+module Parser = Tiger_parser
--- /dev/null
+val token : Lexing.lexbuf -> Tiger_parser.Token.t
--- /dev/null
+{
+ open Tiger_parser.Token
+
+ let comment_level = ref 0
+ let string_buf = Buffer.create 100
+}
+
+let alpha = ['a'-'z' 'A'-'Z']
+let num = ['0'-'9']
+let newline = '\n' | '\r' | "\n\r"
+
+rule token = parse
+ | eof {
+ EOF
+ }
+
+ (* Track line number *)
+ | newline {
+ Lexing.new_line lexbuf;
+ token lexbuf
+ }
+
+ (* Comment *)
+ | "/*" {
+ incr comment_level;
+ comment lexbuf
+ }
+
+ | ":=" {ASSIGN}
+ | "<=" {LE}
+ | ">=" {GE}
+ | "<>" {NEQ}
+ | '&' {AND}
+ | '(' {LPAREN}
+ | ')' {RPAREN}
+ | '*' {TIMES}
+ | '+' {PLUS}
+ | '-' {MINUS}
+ | '/' {DIVIDE}
+ | ',' {COMMA}
+ | '.' {DOT}
+ | ':' {COLON}
+ | ';' {SEMICOLON}
+ | '>' {GT}
+ | '<' {LT}
+ | '=' {EQ}
+ | '[' {LBRACK}
+ | ']' {RBRACK}
+ | '{' {LBRACE}
+ | '}' {RBRACE}
+ | '|' {OR}
+
+ (* String literal *)
+ | '"' {
+ string_literal lexbuf
+ }
+
+ (* Drop whitespace *)
+ | [' ' '\t'] {
+ token lexbuf
+ }
+
+ | (num+ as int) {
+ INT (int_of_string int)
+ }
+
+ | (alpha (alpha | num | '_')* as id) {
+ match id with
+ | "array" -> ARRAY
+ | "break" -> BREAK
+ | "do" -> DO
+ | "else" -> ELSE
+ | "end" -> END
+ | "for" -> FOR
+ | "function" -> FUNCTION
+ | "if" -> IF
+ | "in" -> IN
+ | "let" -> LET
+ | "nil" -> NIL
+ | "of" -> OF
+ | "then" -> THEN
+ | "to" -> TO
+ | "type" -> TYPE
+ | "var" -> VAR
+ | "while" -> WHILE
+ | _ -> ID id
+ }
+
+ (* Eat unimplemented. FIXME: stop indiscriminate eating *)
+ | _ {
+ token lexbuf
+ }
+and string_literal = parse
+ (* Keep escaped quote marks as part of the string literal *)
+ | '\\' '"' {
+ Buffer.add_char string_buf '"';
+ string_literal lexbuf
+ }
+
+ | '"' {
+ let string = Buffer.contents string_buf in
+ Buffer.reset string_buf;
+ STRING string
+ }
+
+
+ | (_ as c) {
+ Buffer.add_char string_buf c;
+ string_literal lexbuf
+ }
+and comment = parse
+ | eof {
+ (* TODO: Error: unterminated comment? or we don't care? *)
+ EOF
+ }
+
+ (* Track line number *)
+ | newline {
+ Lexing.new_line lexbuf;
+ comment lexbuf
+ }
+
+ | "/*" {
+ incr comment_level;
+ comment lexbuf
+ }
+
+ | "*/" {
+ decr comment_level;
+ match !comment_level with
+ | 0 -> token lexbuf
+ | n when n > 0 -> comment lexbuf
+ | _ -> assert false
+ }
+
+ (* Drop comment contents *)
+ | _ {
+ comment lexbuf
+ }
--- /dev/null
+open Printf
+
+module Token = struct
+ type t =
+ | AND
+ | ARRAY
+ | ASSIGN
+ | BREAK
+ | COLON
+ | COMMA
+ | DIVIDE
+ | DO
+ | DOT
+ | ELSE
+ | END
+ | EOF
+ | EQ
+ | FOR
+ | FUNCTION
+ | GE
+ | GT
+ | ID of string
+ | IF
+ | IN
+ | INT of int
+ | LBRACE
+ | LBRACK
+ | LE
+ | LET
+ | LPAREN
+ | LT
+ | MINUS
+ | NEQ
+ | NIL
+ | OF
+ | OR
+ | PLUS
+ | RBRACE
+ | RBRACK
+ | RPAREN
+ | SEMICOLON
+ | STRING of string
+ | THEN
+ | TIMES
+ | TO
+ | TYPE
+ | VAR
+ | WHILE
+
+ let to_string = function
+ | TYPE -> "TYPE"
+ | VAR -> "VAR"
+ | FUNCTION -> "FUNCTION"
+ | BREAK -> "BREAK"
+ | OF -> "OF"
+ | END -> "END"
+ | IN -> "IN"
+ | NIL -> "NIL"
+ | LET -> "LET"
+ | DO -> "DO"
+ | TO -> "TO"
+ | FOR -> "FOR"
+ | WHILE -> "WHILE"
+ | ELSE -> "ELSE"
+ | THEN -> "THEN"
+ | IF -> "IF"
+ | ARRAY -> "ARRAY"
+ | ASSIGN -> "ASSIGN"
+ | OR -> "OR"
+ | AND -> "AND"
+ | GE -> "GE"
+ | GT -> "GT"
+ | LE -> "LE"
+ | LT -> "LT"
+ | NEQ -> "NEQ"
+ | EQ -> "EQ"
+ | DIVIDE -> "DIVIDE"
+ | TIMES -> "TIMES"
+ | MINUS -> "MINUS"
+ | PLUS -> "PLUS"
+ | DOT -> "DOT"
+ | RBRACE -> "RBRACE"
+ | LBRACE -> "LBRACE"
+ | RBRACK -> "RBRACK"
+ | LBRACK -> "LBRACK"
+ | RPAREN -> "RPAREN"
+ | LPAREN -> "LPAREN"
+ | SEMICOLON -> "SEMICOLON"
+ | COLON -> "COLON"
+ | COMMA -> "COMMA"
+ | STRING s -> sprintf "STRING (%S)" s
+ | INT i -> sprintf "INT (%d)" i
+ | ID id -> sprintf "ID (%s)" id
+ | EOF -> "EOF"
+end
--- /dev/null
+module Token : sig
+ type t =
+ | AND
+ | ARRAY
+ | ASSIGN
+ | BREAK
+ | COLON
+ | COMMA
+ | DIVIDE
+ | DO
+ | DOT
+ | ELSE
+ | END
+ | EOF
+ | EQ
+ | FOR
+ | FUNCTION
+ | GE
+ | GT
+ | ID of string
+ | IF
+ | IN
+ | INT of int
+ | LBRACE
+ | LBRACK
+ | LE
+ | LET
+ | LPAREN
+ | LT
+ | MINUS
+ | NEQ
+ | NIL
+ | OF
+ | OR
+ | PLUS
+ | RBRACE
+ | RBRACK
+ | RPAREN
+ | SEMICOLON
+ | STRING of string
+ | THEN
+ | TIMES
+ | TO
+ | TYPE
+ | VAR
+ | WHILE
+
+ val to_string : t -> string
+end