Rename "tiger" dir to "compiler"
[tiger.ml.git] / compiler / src / lib / tiger / tiger_lexer.mll
diff --git a/compiler/src/lib/tiger/tiger_lexer.mll b/compiler/src/lib/tiger/tiger_lexer.mll
new file mode 100644 (file)
index 0000000..359366f
--- /dev/null
@@ -0,0 +1,130 @@
+{
+  open Tiger_parser
+
+  let comment_level = ref 0
+  let string_buf    = Buffer.create 100
+}
+
+let alpha = ['a'-'z' 'A'-'Z']
+let num = ['0'-'9']
+let newline = '\n' | '\r' | "\n\r"
+
+rule token = parse
+  | eof {EOF}
+
+  (* Track line number *)
+  | newline {
+      Lexing.new_line lexbuf;
+      token lexbuf
+  }
+
+  (* Comment *)
+  | "/*" {
+      incr comment_level;
+      comment lexbuf
+  }
+
+  | ":=" {ASSIGN}
+  | "<=" {LE}
+  | ">=" {GE}
+  | "<>" {NEQ}
+  | '&'  {AND}
+  | '('  {LPAREN}
+  | ')'  {RPAREN}
+  | '*'  {TIMES}
+  | '+'  {PLUS}
+  | '-'  {MINUS}
+  | '/'  {DIVIDE}
+  | ','  {COMMA}
+  | '.'  {DOT}
+  | ':'  {COLON}
+  | ';'  {SEMICOLON}
+  | '>'  {GT}
+  | '<'  {LT}
+  | '='  {EQ}
+  | '['  {LBRACK}
+  | ']'  {RBRACK}
+  | '{'  {LBRACE}
+  | '}'  {RBRACE}
+  | '|'  {OR}
+
+  (* String literal *)
+  | '"' {
+      string_literal lexbuf
+  }
+
+  (* Drop whitespace *)
+  | [' ' '\t'] {
+      token lexbuf
+  }
+
+  | (num+ as int) {
+    INT (int_of_string int)
+  }
+
+  | (alpha (alpha | num | '_')* as id) {
+      match id with
+      | "array"    -> ARRAY
+      | "break"    -> BREAK
+      | "do"       -> DO
+      | "else"     -> ELSE
+      | "end"      -> END
+      | "for"      -> FOR
+      | "function" -> FUNCTION
+      | "if"       -> IF
+      | "in"       -> IN
+      | "let"      -> LET
+      | "nil"      -> NIL
+      | "of"       -> OF
+      | "then"     -> THEN
+      | "to"       -> TO
+      | "type"     -> TYPE
+      | "var"      -> VAR
+      | "while"    -> WHILE
+      | _          -> (ID id)
+  }
+and string_literal = parse
+  (* Keep escaped quote marks as part of the string literal *)
+  | '\\' '"' {
+      Buffer.add_char string_buf '"';
+      string_literal lexbuf
+  }
+
+  | '"' {
+      let string = Buffer.contents string_buf in
+      Buffer.reset string_buf;
+      STRING string
+  }
+
+
+  | (_ as c) {
+      Buffer.add_char string_buf c;
+      string_literal lexbuf
+  }
+and comment = parse
+  (* TODO: Error: unterminated comment? or we don't care? *)
+  | eof {EOF}
+
+  (* Track line number *)
+  | newline {
+      Lexing.new_line lexbuf;
+      comment lexbuf
+  }
+
+  | "/*" {
+      incr comment_level;
+      comment lexbuf
+  }
+
+  | "*/" {
+      decr comment_level;
+      match !comment_level with
+      | 0            -> token lexbuf
+      | n when n > 0 -> comment lexbuf
+      | _            -> assert false
+  }
+
+  (* Drop comment contents *)
+  | _ {
+      comment lexbuf
+  }
This page took 0.032052 seconds and 4 git commands to generate.