9e08685c5c788a718b9165f294f13c279a34e837
[tiger.ml.git] / tiger / src / lib / tiger / tiger_lexer.mll
1 {
2 open Tiger_parser.Token
3
4 let comment_level = ref 0
5 let string_buf = Buffer.create 100
6 }
7
8 let alpha = ['a'-'z' 'A'-'Z']
9 let num = ['0'-'9']
10 let newline = '\n' | '\r' | "\n\r"
11
12 rule token = parse
13 | eof {
14 None
15 }
16
17 (* Track line number *)
18 | newline {
19 Lexing.new_line lexbuf;
20 token lexbuf
21 }
22
23 (* Comment *)
24 | "/*" {
25 incr comment_level;
26 comment lexbuf
27 }
28
29 | ":=" {Some ASSIGN}
30 | "<=" {Some LE}
31 | ">=" {Some GE}
32 | "<>" {Some NEQ}
33 | '&' {Some AND}
34 | '(' {Some LPAREN}
35 | ')' {Some RPAREN}
36 | '*' {Some TIMES}
37 | '+' {Some PLUS}
38 | '-' {Some MINUS}
39 | '/' {Some DIVIDE}
40 | ',' {Some COMMA}
41 | '.' {Some DOT}
42 | ':' {Some COLON}
43 | ';' {Some SEMICOLON}
44 | '>' {Some GT}
45 | '<' {Some LT}
46 | '=' {Some EQ}
47 | '[' {Some LBRACK}
48 | ']' {Some RBRACK}
49 | '{' {Some LBRACE}
50 | '}' {Some RBRACE}
51 | '|' {Some OR}
52
53 (* String literal *)
54 | '"' {
55 string_literal lexbuf
56 }
57
58 (* Drop whitespace *)
59 | [' ' '\t'] {
60 token lexbuf
61 }
62
63 | (num+ as int) {
64 Some (INT (int_of_string int))
65 }
66
67 | (alpha (alpha | num | '_')* as id) {
68 match id with
69 | "array" -> Some ARRAY
70 | "break" -> Some BREAK
71 | "do" -> Some DO
72 | "else" -> Some ELSE
73 | "end" -> Some END
74 | "for" -> Some FOR
75 | "function" -> Some FUNCTION
76 | "if" -> Some IF
77 | "in" -> Some IN
78 | "let" -> Some LET
79 | "nil" -> Some NIL
80 | "of" -> Some OF
81 | "then" -> Some THEN
82 | "to" -> Some TO
83 | "type" -> Some TYPE
84 | "var" -> Some VAR
85 | "while" -> Some WHILE
86 | _ -> Some (ID id)
87 }
88 and string_literal = parse
89 (* Keep escaped quote marks as part of the string literal *)
90 | '\\' '"' {
91 Buffer.add_char string_buf '"';
92 string_literal lexbuf
93 }
94
95 | '"' {
96 let string = Buffer.contents string_buf in
97 Buffer.reset string_buf;
98 Some (STRING string)
99 }
100
101
102 | (_ as c) {
103 Buffer.add_char string_buf c;
104 string_literal lexbuf
105 }
106 and comment = parse
107 | eof {
108 (* TODO: Error: unterminated comment? or we don't care? *)
109 None
110 }
111
112 (* Track line number *)
113 | newline {
114 Lexing.new_line lexbuf;
115 comment lexbuf
116 }
117
118 | "/*" {
119 incr comment_level;
120 comment lexbuf
121 }
122
123 | "*/" {
124 decr comment_level;
125 match !comment_level with
126 | 0 -> token lexbuf
127 | n when n > 0 -> comment lexbuf
128 | _ -> assert false
129 }
130
131 (* Drop comment contents *)
132 | _ {
133 comment lexbuf
134 }
This page took 0.067674 seconds and 3 git commands to generate.