(* 
   OCamllex lexer for banal's our simple language.

   Copyright (C) 2011 Antoine Miné
*)

{
open Lexing
open Abstract_syntax
open Parser

let use_line_directives = ref false

let kwd_table = Hashtbl.create 10
let _ = 
  List.iter (fun (a,b) -> Hashtbl.add kwd_table a b)
    [
     "bool", TOK_BOOL;
     "char", TOK_CHAR;
     "short", TOK_SHORT;
     "int", TOK_INT;
     "long", TOK_LONG;
     "integer", TOK_INTEGER;
     "unsigned", TOK_UNSIGNED;
     "float", TOK_FLOAT;
     "double", TOK_DOUBLE;
     "real", TOK_REAL;
     "void", TOK_VOID;
     "while", TOK_WHILE;
     "if", TOK_IF;
     "else", TOK_ELSE;
     "return", TOK_RETURN;
     "break", TOK_BREAK;
     "true", TOK_TRUE;
     "false", TOK_FALSE;
     "input", TOK_INPUT;
     "volatile", TOK_VOLATILE;
     "assert", TOK_ASSERT;
     "assume", TOK_ASSUME;
     "print", TOK_PRINT;
   ]
}

(* special character classes *)
let space = [' ' '\t' '\r']+
let newline = "\n" | "\r" | "\r\n"

(* utilities *)
let digit = ['0'-'9']
let digit_ = ['0'-'9' '_']
let sign = "+" | "-"
let filename = ([^ '\"'] | "\\\"")*

(* integers *)
let int_dec = digit digit_*
let int_bin = ("0b" | "0B") ['0'-'1'] ['0'-'1' '_']*
let int_oct = ("0o" | "0O") ['0'-'7'] ['0'-'7' '_']*
let int_hex = ("0x" | "0X") ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F' '_']*
let const_int = int_bin | int_oct | int_dec | int_hex

(* floats *)
let float_frac = digit digit_* "." digit_*
let float_exp = ("e" | "E") sign? digit digit_*
let const_float = float_frac float_exp? | int_dec float_exp

(* token at start of line *)
rule start_token = parse

(* preprocessor line mark *)
| "#" space* "line"? space* (digit+ as line) space*
   ("\"" (filename as file) "\"")? [^ '\n' '\r']* newline
{ 
  new_line lexbuf;
  if !use_line_directives then
    lexbuf.lex_curr_p <- 
      { lexbuf.lex_curr_p with
        pos_lnum = int_of_string line;
        pos_fname = 
        match file with Some x -> x | None -> lexbuf.lex_curr_p.pos_fname;
      };
  start_token lexbuf 
}

| space 
{ start_token lexbuf }

| "" 
{ token lexbuf }

(* token at any place in line *)
and token = parse

(* identifier or reserved keyword *)
| ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '0'-'9' '_']* as id
{ try Hashtbl.find kwd_table id with Not_found -> TOK_id id }

(* symbols *)
| "("    { TOK_LPAREN }
| ")"    { TOK_RPAREN }
| "["    { TOK_LBRACKET }
| "]"    { TOK_RBRACKET }
| "{"    { TOK_LCURLY }
| "}"    { TOK_RCURLY }
| "++"   { TOK_PLUS_PLUS }
| "--"   { TOK_MINUS_MINUS }
| "*"    { TOK_STAR }
| "+"    { TOK_PLUS }
| "-"    { TOK_MINUS }
| "!"    { TOK_EXCLAIM }
| "/"    { TOK_DIVIDE }
| "%"    { TOK_PERCENT }
| "<"    { TOK_LESS }
| ">"    { TOK_GREATER }
| "<="   { TOK_LESS_EQUAL }
| ">="   { TOK_GREATER_EQUAL }
| "=="   { TOK_EQUAL_EQUAL }
| "!="   { TOK_NOT_EQUAL }
| "&&"   { TOK_AND_AND }
| "||"   { TOK_BAR_BAR }
| ";"    { TOK_SEMICOLON }
| ":"    { TOK_COLON }
| "="    { TOK_EQUAL }
| "*="   { TOK_STAR_EQUAL }
| "/="   { TOK_DIVIDE_EQUAL }
| "%="   { TOK_PERCENT_EQUAL }
| "+="   { TOK_PLUS_EQUAL }
| "-="   { TOK_MINUS_EQUAL }
| ","    { TOK_COMMA }

(* literals *)
| const_int    as c { TOK_int_literal c }
| const_float  as c { TOK_float_literal c }

(* spaces, comments *)
| "/*" { comment lexbuf; token lexbuf }
| "//" [^ '\n' '\r']* { token lexbuf }
| newline { new_line lexbuf; start_token lexbuf }
| space { token lexbuf }

| eof { TOK_EOF }


(* comments *)
and comment = parse
| "*/" { () }
| [^ '\n' '\r'] { comment lexbuf }
| newline { new_line lexbuf; comment lexbuf }
