Complete 1.02.p.1: Tiger lexer
[tiger.ml.git] / tiger / src / lib / tiger / tiger_lexer.mll
1 {
2 open Tiger_parser.Token
3
4 let comment_level = ref 0
5 let string_buf = Buffer.create 100
6 }
7
8 let alpha = ['a'-'z' 'A'-'Z']
9 let num = ['0'-'9']
10 let newline = '\n' | '\r' | "\n\r"
11
12 rule token = parse
13 | eof {
14 EOF
15 }
16
17 (* Track line number *)
18 | newline {
19 Lexing.new_line lexbuf;
20 token lexbuf
21 }
22
23 (* Comment *)
24 | "/*" {
25 incr comment_level;
26 comment lexbuf
27 }
28
29 | ":=" {ASSIGN}
30 | "<=" {LE}
31 | ">=" {GE}
32 | "<>" {NEQ}
33 | '&' {AND}
34 | '(' {LPAREN}
35 | ')' {RPAREN}
36 | '*' {TIMES}
37 | '+' {PLUS}
38 | '-' {MINUS}
39 | '/' {DIVIDE}
40 | ',' {COMMA}
41 | '.' {DOT}
42 | ':' {COLON}
43 | ';' {SEMICOLON}
44 | '>' {GT}
45 | '<' {LT}
46 | '=' {EQ}
47 | '[' {LBRACK}
48 | ']' {RBRACK}
49 | '{' {LBRACE}
50 | '}' {RBRACE}
51 | '|' {OR}
52
53 (* String literal *)
54 | '"' {
55 string_literal lexbuf
56 }
57
58 (* Drop whitespace *)
59 | [' ' '\t'] {
60 token lexbuf
61 }
62
63 | (num+ as int) {
64 INT (int_of_string int)
65 }
66
67 | (alpha (alpha | num | '_')* as id) {
68 match id with
69 | "array" -> ARRAY
70 | "break" -> BREAK
71 | "do" -> DO
72 | "else" -> ELSE
73 | "end" -> END
74 | "for" -> FOR
75 | "function" -> FUNCTION
76 | "if" -> IF
77 | "in" -> IN
78 | "let" -> LET
79 | "nil" -> NIL
80 | "of" -> OF
81 | "then" -> THEN
82 | "to" -> TO
83 | "type" -> TYPE
84 | "var" -> VAR
85 | "while" -> WHILE
86 | _ -> ID id
87 }
88
89 (* Eat unimplemented. FIXME: stop indiscriminate eating *)
90 | _ {
91 token lexbuf
92 }
93 and string_literal = parse
94 (* Keep escaped quote marks as part of the string literal *)
95 | '\\' '"' {
96 Buffer.add_char string_buf '"';
97 string_literal lexbuf
98 }
99
100 | '"' {
101 let string = Buffer.contents string_buf in
102 Buffer.reset string_buf;
103 STRING string
104 }
105
106
107 | (_ as c) {
108 Buffer.add_char string_buf c;
109 string_literal lexbuf
110 }
111 and comment = parse
112 | eof {
113 (* TODO: Error: unterminated comment? or we don't care? *)
114 EOF
115 }
116
117 (* Track line number *)
118 | newline {
119 Lexing.new_line lexbuf;
120 comment lexbuf
121 }
122
123 | "/*" {
124 incr comment_level;
125 comment lexbuf
126 }
127
128 | "*/" {
129 decr comment_level;
130 match !comment_level with
131 | 0 -> token lexbuf
132 | n when n > 0 -> comment lexbuf
133 | _ -> assert false
134 }
135
136 (* Drop comment contents *)
137 | _ {
138 comment lexbuf
139 }
This page took 0.073521 seconds and 4 git commands to generate.