[tiger.ml.git] / tiger_lexer.mll

{
  open Tiger_parser.Token

  let comment_level = ref 0
  let string_buf    = Buffer.create 100
}

let alpha = ['a'-'z' 'A'-'Z']
let num = ['0'-'9']
let newline = '\n' | '\r' | "\n\r"

rule token = parse
  | eof {
      EOF
  }

  (* Track line number *)
  | newline {
      Lexing.new_line lexbuf;
      token lexbuf
  }

  (* Comment *)
  | "/*" {
      incr comment_level;
      comment lexbuf
  }

  | ":=" {ASSIGN}
  | "<=" {LE}
  | ">=" {GE}
  | "<>" {NEQ}
  | '&'  {AND}
  | '('  {LPAREN}
  | ')'  {RPAREN}
  | '*'  {TIMES}
  | '+'  {PLUS}
  | '-'  {MINUS}
  | '/'  {DIVIDE}
  | ','  {COMMA}
  | '.'  {DOT}
  | ':'  {COLON}
  | ';'  {SEMICOLON}
  | '>'  {GT}
  | '<'  {LT}
  | '='  {EQ}
  | '['  {LBRACK}
  | ']'  {RBRACK}
  | '{'  {LBRACE}
  | '}'  {RBRACE}
  | '|'  {OR}

  (* String literal *)
  | '"' {
      string_literal lexbuf
  }

  (* Drop whitespace *)
  | [' ' '\t'] {
      token lexbuf
  }

  | (num+ as int) {
    INT (int_of_string int)
  }

  | (alpha (alpha | num | '_')* as id) {
      match id with
      | "array"    -> ARRAY
      | "break"    -> BREAK
      | "do"       -> DO
      | "else"     -> ELSE
      | "end"      -> END
      | "for"      -> FOR
      | "function" -> FUNCTION
      | "if"       -> IF
      | "in"       -> IN
      | "let"      -> LET
      | "nil"      -> NIL
      | "of"       -> OF
      | "then"     -> THEN
      | "to"       -> TO
      | "type"     -> TYPE
      | "var"      -> VAR
      | "while"    -> WHILE
      | _          -> ID id
  }
and string_literal = parse
  (* Keep escaped quote marks as part of the string literal *)
  | '\\' '"' {
      Buffer.add_char string_buf '"';
      string_literal lexbuf
  }

  | '"' {
      let string = Buffer.contents string_buf in
      Buffer.reset string_buf;
      STRING string
  }


  | (_ as c) {
      Buffer.add_char string_buf c;
      string_literal lexbuf
  }
and comment = parse
  | eof {
      (* TODO: Error: unterminated comment? or we don't care? *)
      EOF
  }

  (* Track line number *)
  | newline {
      Lexing.new_line lexbuf;
      comment lexbuf
  }

  | "/*" {
      incr comment_level;
      comment lexbuf
  }

  | "*/" {
      decr comment_level;
      match !comment_level with
      | 0            -> token lexbuf
      | n when n > 0 -> comment lexbuf
      | _            -> assert false
  }

  (* Drop comment contents *)
  | _ {
      comment lexbuf
  }
Commit	Line	Data
	1	{
	2	open Tiger_parser.Token
	3
	4	let comment_level = ref 0
	5	let string_buf = Buffer.create 100
	6	}
	7
	8	let alpha = ['a'-'z' 'A'-'Z']
	9	let num = ['0'-'9']
	10	let newline = '\n' \| '\r' \| "\n\r"
	11
	12	rule token = parse
	13	\| eof {
	14	EOF
	15	}
	16
	17	(* Track line number *)
	18	\| newline {
	19	Lexing.new_line lexbuf;
	20	token lexbuf
	21	}
	22
	23	(* Comment *)
	24	\| "/*" {
	25	incr comment_level;
	26	comment lexbuf
	27	}
	28
	29	\| ":=" {ASSIGN}
	30	\| "<=" {LE}
	31	\| ">=" {GE}
	32	\| "<>" {NEQ}
	33	\| '&' {AND}
	34	\| '(' {LPAREN}
	35	\| ')' {RPAREN}
	36	\| '*' {TIMES}
	37	\| '+' {PLUS}
	38	\| '-' {MINUS}
	39	\| '/' {DIVIDE}
	40	\| ',' {COMMA}
	41	\| '.' {DOT}
	42	\| ':' {COLON}
	43	\| ';' {SEMICOLON}
	44	\| '>' {GT}
	45	\| '<' {LT}
	46	\| '=' {EQ}
	47	\| '[' {LBRACK}
	48	\| ']' {RBRACK}
	49	\| '{' {LBRACE}
	50	\| '}' {RBRACE}
	51	\| '\|' {OR}
	52
	53	(* String literal *)
	54	\| '"' {
	55	string_literal lexbuf
	56	}
	57
	58	(* Drop whitespace *)
	59	\| [' ' '\t'] {
	60	token lexbuf
	61	}
	62
	63	\| (num+ as int) {
	64	INT (int_of_string int)
	65	}
	66
	67	\| (alpha (alpha \| num \| '_')* as id) {
	68	match id with
	69	\| "array" -> ARRAY
	70	\| "break" -> BREAK
	71	\| "do" -> DO
	72	\| "else" -> ELSE
	73	\| "end" -> END
	74	\| "for" -> FOR
	75	\| "function" -> FUNCTION
	76	\| "if" -> IF
	77	\| "in" -> IN
	78	\| "let" -> LET
	79	\| "nil" -> NIL
	80	\| "of" -> OF
	81	\| "then" -> THEN
	82	\| "to" -> TO
	83	\| "type" -> TYPE
	84	\| "var" -> VAR
	85	\| "while" -> WHILE
	86	\| _ -> ID id
	87	}
	88	and string_literal = parse
	89	(* Keep escaped quote marks as part of the string literal *)
	90	\| '\\' '"' {
	91	Buffer.add_char string_buf '"';
	92	string_literal lexbuf
	93	}
	94
	95	\| '"' {
	96	let string = Buffer.contents string_buf in
	97	Buffer.reset string_buf;
	98	STRING string
	99	}
	100
	101
	102	\| (_ as c) {
	103	Buffer.add_char string_buf c;
	104	string_literal lexbuf
	105	}
	106	and comment = parse
	107	\| eof {
	108	(* TODO: Error: unterminated comment? or we don't care? *)
	109	EOF
	110	}
	111
	112	(* Track line number *)
	113	\| newline {
	114	Lexing.new_line lexbuf;
	115	comment lexbuf
	116	}
	117
	118	\| "/*" {
	119	incr comment_level;
	120	comment lexbuf
	121	}
	122
	123	\| "*/" {
	124	decr comment_level;
	125	match !comment_level with
	126	\| 0 -> token lexbuf
	127	\| n when n > 0 -> comment lexbuf
	128	\| _ -> assert false
	129	}
	130
	131	(* Drop comment contents *)
	132	\| _ {
	133	comment lexbuf
	134	}