Complete 1.02.p.1: Tiger lexer

author Siraaj Khandkar <siraaj@khandkar.net>

Wed, 23 May 2018 03:46:42 +0000 (23:46 -0400)

committer Siraaj Khandkar <siraaj@khandkar.net>

Wed, 23 May 2018 03:46:42 +0000 (23:46 -0400)
author Siraaj Khandkar <siraaj@khandkar.net>
Wed, 23 May 2018 03:46:42 +0000 (23:46 -0400)
committer Siraaj Khandkar <siraaj@khandkar.net>
Wed, 23 May 2018 03:46:42 +0000 (23:46 -0400)
diff --git a/README.md b/README.md

index abe952e..cda2fd7 100644 (file)
--- a/README.md
+++ b/README.md
@@ -22,14 +22,14 @@ Project Plan
  | [x]    | 1.01.e.1.c | ---- demo unbalanced behaviour           | ---   | --       | --     | 2018-04-18 | 2018-04-18 |
  | [x]    | 1.01.e.1.d | ---- find functional balanced tree       | ---   | --       | --     | 2018-04-19 | 2018-04-20 |
  | ------ | ---------- | ---------------------------------------- | ----- | -------- | ------ | ---------- | ---------- |
-| [ ]    | 1.02       | -- Lexical Analysis                      | 024   | 01       | --     | ---------- | ---------- |
+| [ ]    | 1.02       | -- Lexical Analysis                      | 024   | 01       | 01     | 2018-05-22 | 2018-05-22 |
  | [ ]    | 1.02.1     | --- Lexical tokens                       | 001   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.2     | --- Regular expressions                  | 003   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.3     | --- Finite automata                      | 003   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.4     | --- Nondeterministic finite automata     | 006   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.5     | --- ML-Lex: a lexical analyzer generator | 003   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.p     | --- Program                              | 002   | --       | --     | ---------- | ---------- |
-| [ ]    | 1.02.p.1   | ---- Tiger lexer                         | ---   | --       | --     | ---------- | ---------- |
+| [ ]    | 1.02.p.1   | ---- Tiger lexer                         | ---   | 01       | 01     | 2018-05-22 | 2018-05-22 |
  | [ ]    | 1.02.e     | --- Exercises                            | 004   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.e.01  | ---- regexes                             | ---   | --       | --     | ---------- | ---------- |
  | [ ]    | 1.02.e.02  | ---- why no regexes                      | ---   | --       | --     | ---------- | ---------- |
diff --git a/tiger/Makefile b/tiger/Makefile

index 9917366..20eedaa 100644 (file)
--- a/tiger/Makefile
+++ b/tiger/Makefile
@@ -1,9 +1,9 @@
  MAKEFLAGS := --no-builtin-rules
  
-EXE_TYPE := byte # byte | native
-EXECUTABLES := tiger
+EXE_TYPE         := byte  # byte | native
+EXECUTABLES      := tigerc tiger_tests
  OCAMLBUILD_FLAGS := -I src/exe -I src/lib/tiger
-OCAMLBUILD := ocamlbuild $(OCAMLBUILD_FLAGS)
+OCAMLBUILD       := ocamlbuild $(OCAMLBUILD_FLAGS)
  
  .PHONY: \
   all \
@@ -12,19 +12,19 @@ OCAMLBUILD := ocamlbuild $(OCAMLBUILD_FLAGS)
   test
  
  all:
-       $(MAKE) clean
-       $(MAKE) build
-       $(MAKE) test
+       @$(MAKE) -s clean
+       @$(MAKE) -s build
+       @$(MAKE) -s test
  
  build:
-       $(OCAMLBUILD) $(addsuffix _main.$(EXE_TYPE),$(EXECUTABLES))
-       mkdir -p bin/exe
-       $(foreach exe,$(EXECUTABLES),cp _build/src/exe/$(exe)_main.$(EXE_TYPE) bin/exe/$(exe))
-       rm $(addsuffix _main.$(EXE_TYPE),$(EXECUTABLES))
+       @$(OCAMLBUILD) $(addsuffix .$(EXE_TYPE),$(EXECUTABLES))
+       @mkdir -p bin/exe
+       $(foreach exe,$(EXECUTABLES),cp _build/src/exe/$(exe).$(EXE_TYPE) bin/exe/$(exe); )
+       @rm $(addsuffix .$(EXE_TYPE),$(EXECUTABLES))
  
  clean:
-       $(OCAMLBUILD) -clean
-       rm -rf ./bin
+       @$(OCAMLBUILD) -clean
+       @rm -rf ./bin
  
-test: bin/exe/tiger
-       ./$<
+test: bin/exe/tiger_tests
+       @./$<
diff --git a/tiger/src/exe/tiger_main.ml b/tiger/src/exe/tiger_main.ml

deleted file mode 100644 (file)

index bd1a443..0000000
--- a/tiger/src/exe/tiger_main.ml
+++ /dev/null
@@ -1,3 +0,0 @@
-let () =
-  let bar = String.make 80 '-' in
-  Printf.printf "%s\nTiger says: %S\n%s\n" bar Tiger.Growl.text bar;
diff --git a/tiger/src/exe/tiger_tests.ml b/tiger/src/exe/tiger_tests.ml

new file mode 100644 (file)

index 0000000..314eece
--- /dev/null
+++ b/tiger/src/exe/tiger_tests.ml
@@ -0,0 +1,46 @@
+open Printf
+
+module List = ListLabels
+
+let test_01 =
+  let code =
+    "
+    /* an array type and an array variable */
+    let
+        type  arrtype = array of int
+        var arr1:arrtype := arrtype [10] of 0
+    in
+        arr1
+    end
+    "
+  in
+  let tokens =
+    let open Tiger.Parser.Token in
+    [ LET;
+        TYPE; ID "arrtype"; EQ; ARRAY; OF; ID "int";
+        VAR; ID "arr1"; COLON; ID "arrtype"; ASSIGN;
+          ID "arrtype"; LBRACK; INT 10; RBRACK; OF; INT 0;
+      IN;
+        ID "arr1";
+      END
+    ]
+  in
+  (code, tokens)
+
+let tokens_of_code code =
+  let lexbuf = Lexing.from_string code in
+  let rec tokens () =
+    match Tiger.Lexer.token lexbuf with
+    | Tiger.Parser.Token.EOF -> []
+    | token -> token :: tokens ()
+  in
+  tokens ()
+
+let tests =
+  [ test_01
+  ]
+
+let () =
+  List.iter tests ~f:(fun (code, tokens_expected) ->
+    assert ((tokens_of_code code) = tokens_expected)
+  )
diff --git a/tiger/src/exe/tiger_main.mli b/tiger/src/exe/tiger_tests.mli

similarity index 100%

copy from tiger/src/exe/tiger_main.mli

copy to tiger/src/exe/tiger_tests.mli
diff --git a/tiger/src/exe/tigerc.ml b/tiger/src/exe/tigerc.ml

new file mode 100644 (file)

index 0000000..77eafb4
--- /dev/null
+++ b/tiger/src/exe/tigerc.ml
@@ -0,0 +1,15 @@
+open Printf
+
+let () =
+  let path_to_program_file = Sys.argv.(1) in
+  let ic = open_in path_to_program_file in
+  let lexbuf = Lexing.from_channel ic in
+  let rec parse_and_print () =
+    let token = Tiger.Lexer.token lexbuf in
+    printf "%s\n" (Tiger.Parser.Token.to_string token);
+    match token with
+    | Tiger.Parser.Token.EOF -> ()
+    | _ -> parse_and_print ()
+  in
+  parse_and_print ();
+  close_in ic;
diff --git a/tiger/src/exe/tiger_main.mli b/tiger/src/exe/tigerc.mli

similarity index 100%

rename from tiger/src/exe/tiger_main.mli

rename to tiger/src/exe/tigerc.mli
diff --git a/tiger/src/lib/tiger/tiger.ml b/tiger/src/lib/tiger/tiger.ml

index 9844f26..a9a2799 100644 (file)
--- a/tiger/src/lib/tiger/tiger.ml
+++ b/tiger/src/lib/tiger/tiger.ml
@@ -1 +1,2 @@
-module Growl = struct let text = "Grrrrrrrrr!" end
+module Lexer  = Tiger_lexer
+module Parser = Tiger_parser
diff --git a/tiger/src/lib/tiger/tiger_lexer.mli b/tiger/src/lib/tiger/tiger_lexer.mli

new file mode 100644 (file)

index 0000000..fd30dce
--- /dev/null
+++ b/tiger/src/lib/tiger/tiger_lexer.mli
@@ -0,0 +1 @@
+val token : Lexing.lexbuf -> Tiger_parser.Token.t
diff --git a/tiger/src/lib/tiger/tiger_lexer.mll b/tiger/src/lib/tiger/tiger_lexer.mll

new file mode 100644 (file)

index 0000000..86bf0f8
--- /dev/null
+++ b/tiger/src/lib/tiger/tiger_lexer.mll
@@ -0,0 +1,139 @@
+{
+  open Tiger_parser.Token
+
+  let comment_level = ref 0
+  let string_buf    = Buffer.create 100
+}
+
+let alpha = ['a'-'z' 'A'-'Z']
+let num = ['0'-'9']
+let newline = '\n' | '\r' | "\n\r"
+
+rule token = parse
+  | eof {
+      EOF
+  }
+
+  (* Track line number *)
+  | newline {
+      Lexing.new_line lexbuf;
+      token lexbuf
+  }
+
+  (* Comment *)
+  | "/*" {
+      incr comment_level;
+      comment lexbuf
+  }
+
+  | ":=" {ASSIGN}
+  | "<=" {LE}
+  | ">=" {GE}
+  | "<>" {NEQ}
+  | '&'  {AND}
+  | '('  {LPAREN}
+  | ')'  {RPAREN}
+  | '*'  {TIMES}
+  | '+'  {PLUS}
+  | '-'  {MINUS}
+  | '/'  {DIVIDE}
+  | ','  {COMMA}
+  | '.'  {DOT}
+  | ':'  {COLON}
+  | ';'  {SEMICOLON}
+  | '>'  {GT}
+  | '<'  {LT}
+  | '='  {EQ}
+  | '['  {LBRACK}
+  | ']'  {RBRACK}
+  | '{'  {LBRACE}
+  | '}'  {RBRACE}
+  | '|'  {OR}
+
+  (* String literal *)
+  | '"' {
+      string_literal lexbuf
+  }
+
+  (* Drop whitespace *)
+  | [' ' '\t'] {
+      token lexbuf
+  }
+
+  | (num+ as int) {
+    INT (int_of_string int)
+  }
+
+  | (alpha (alpha | num | '_')* as id) {
+      match id with
+      | "array"    -> ARRAY
+      | "break"    -> BREAK
+      | "do"       -> DO
+      | "else"     -> ELSE
+      | "end"      -> END
+      | "for"      -> FOR
+      | "function" -> FUNCTION
+      | "if"       -> IF
+      | "in"       -> IN
+      | "let"      -> LET
+      | "nil"      -> NIL
+      | "of"       -> OF
+      | "then"     -> THEN
+      | "to"       -> TO
+      | "type"     -> TYPE
+      | "var"      -> VAR
+      | "while"    -> WHILE
+      | _          -> ID id
+  }
+
+  (* Eat unimplemented. FIXME: stop indiscriminate eating *)
+  | _ {
+      token lexbuf
+  }
+and string_literal = parse
+  (* Keep escaped quote marks as part of the string literal *)
+  | '\\' '"' {
+      Buffer.add_char string_buf '"';
+      string_literal lexbuf
+  }
+
+  | '"' {
+      let string = Buffer.contents string_buf in
+      Buffer.reset string_buf;
+      STRING string
+  }
+
+
+  | (_ as c) {
+      Buffer.add_char string_buf c;
+      string_literal lexbuf
+  }
+and comment = parse
+  | eof {
+      (* TODO: Error: unterminated comment? or we don't care? *)
+      EOF
+  }
+
+  (* Track line number *)
+  | newline {
+      Lexing.new_line lexbuf;
+      comment lexbuf
+  }
+
+  | "/*" {
+      incr comment_level;
+      comment lexbuf
+  }
+
+  | "*/" {
+      decr comment_level;
+      match !comment_level with
+      | 0            -> token lexbuf
+      | n when n > 0 -> comment lexbuf
+      | _            -> assert false
+  }
+
+  (* Drop comment contents *)
+  | _ {
+      comment lexbuf
+  }
diff --git a/tiger/src/lib/tiger/tiger_parser.ml b/tiger/src/lib/tiger/tiger_parser.ml

new file mode 100644 (file)

index 0000000..191f8a3
--- /dev/null
+++ b/tiger/src/lib/tiger/tiger_parser.ml
@@ -0,0 +1,95 @@
+open Printf
+
+module Token = struct
+  type t =
+    | AND
+    | ARRAY
+    | ASSIGN
+    | BREAK
+    | COLON
+    | COMMA
+    | DIVIDE
+    | DO
+    | DOT
+    | ELSE
+    | END
+    | EOF
+    | EQ
+    | FOR
+    | FUNCTION
+    | GE
+    | GT
+    | ID of string
+    | IF
+    | IN
+    | INT of int
+    | LBRACE
+    | LBRACK
+    | LE
+    | LET
+    | LPAREN
+    | LT
+    | MINUS
+    | NEQ
+    | NIL
+    | OF
+    | OR
+    | PLUS
+    | RBRACE
+    | RBRACK
+    | RPAREN
+    | SEMICOLON
+    | STRING of string
+    | THEN
+    | TIMES
+    | TO
+    | TYPE
+    | VAR
+    | WHILE
+
+  let to_string = function
+    | TYPE      -> "TYPE"
+    | VAR       -> "VAR"
+    | FUNCTION  -> "FUNCTION"
+    | BREAK     -> "BREAK"
+    | OF        -> "OF"
+    | END       -> "END"
+    | IN        -> "IN"
+    | NIL       -> "NIL"
+    | LET       -> "LET"
+    | DO        -> "DO"
+    | TO        -> "TO"
+    | FOR       -> "FOR"
+    | WHILE     -> "WHILE"
+    | ELSE      -> "ELSE"
+    | THEN      -> "THEN"
+    | IF        -> "IF"
+    | ARRAY     -> "ARRAY"
+    | ASSIGN    -> "ASSIGN"
+    | OR        -> "OR"
+    | AND       -> "AND"
+    | GE        -> "GE"
+    | GT        -> "GT"
+    | LE        -> "LE"
+    | LT        -> "LT"
+    | NEQ       -> "NEQ"
+    | EQ        -> "EQ"
+    | DIVIDE    -> "DIVIDE"
+    | TIMES     -> "TIMES"
+    | MINUS     -> "MINUS"
+    | PLUS      -> "PLUS"
+    | DOT       -> "DOT"
+    | RBRACE    -> "RBRACE"
+    | LBRACE    -> "LBRACE"
+    | RBRACK    -> "RBRACK"
+    | LBRACK    -> "LBRACK"
+    | RPAREN    -> "RPAREN"
+    | LPAREN    -> "LPAREN"
+    | SEMICOLON -> "SEMICOLON"
+    | COLON     -> "COLON"
+    | COMMA     -> "COMMA"
+    | STRING s  -> sprintf "STRING (%S)" s
+    | INT    i  -> sprintf "INT (%d)" i
+    | ID     id -> sprintf "ID (%s)" id
+    | EOF       -> "EOF"
+end
diff --git a/tiger/src/lib/tiger/tiger_parser.mli b/tiger/src/lib/tiger/tiger_parser.mli

new file mode 100644 (file)

index 0000000..3eb635e
--- /dev/null
+++ b/tiger/src/lib/tiger/tiger_parser.mli
@@ -0,0 +1,49 @@
+module Token : sig
+  type t =
+    | AND
+    | ARRAY
+    | ASSIGN
+    | BREAK
+    | COLON
+    | COMMA
+    | DIVIDE
+    | DO
+    | DOT
+    | ELSE
+    | END
+    | EOF
+    | EQ
+    | FOR
+    | FUNCTION
+    | GE
+    | GT
+    | ID of string
+    | IF
+    | IN
+    | INT of int
+    | LBRACE
+    | LBRACK
+    | LE
+    | LET
+    | LPAREN
+    | LT
+    | MINUS
+    | NEQ
+    | NIL
+    | OF
+    | OR
+    | PLUS
+    | RBRACE
+    | RBRACK
+    | RPAREN
+    | SEMICOLON
+    | STRING of string
+    | THEN
+    | TIMES
+    | TO
+    | TYPE
+    | VAR
+    | WHILE
+
+  val to_string : t -> string
+end
author	Siraaj Khandkar <siraaj@khandkar.net>
	Wed, 23 May 2018 03:46:42 +0000 (23:46 -0400)
committer	Siraaj Khandkar <siraaj@khandkar.net>
	Wed, 23 May 2018 03:46:42 +0000 (23:46 -0400)
README.md		patch \| blob \| blame \| history
tiger/Makefile		patch \| blob \| blame \| history
tiger/src/exe/tiger_main.ml	[deleted file]	patch \| blob \| blame \| history
tiger/src/exe/tiger_tests.ml	[new file with mode: 0644]	patch \| blob
tiger/src/exe/tiger_tests.mli	[copied from tiger/src/exe/tiger_main.mli with 100% similarity]	patch \| blob \| blame \| history
tiger/src/exe/tigerc.ml	[new file with mode: 0644]	patch \| blob
tiger/src/exe/tigerc.mli	[moved from tiger/src/exe/tiger_main.mli with 100% similarity]	patch \| blob \| blame \| history
tiger/src/lib/tiger/tiger.ml		patch \| blob \| blame \| history
tiger/src/lib/tiger/tiger_lexer.mli	[new file with mode: 0644]	patch \| blob
tiger/src/lib/tiger/tiger_lexer.mll	[new file with mode: 0644]	patch \| blob
tiger/src/lib/tiger/tiger_parser.ml	[new file with mode: 0644]	patch \| blob
tiger/src/lib/tiger/tiger_parser.mli	[new file with mode: 0644]	patch \| blob