package jp.ac.osaka_u.ist.sel.similarity.hash.parser;

import java.util.regex.Pattern;

import org.apache.log4j.Logger;

%%

%public
%class CppTokenStream
%implements TokenStream
%function next
%type String
%eofclose

%unicode
%line
%column

%init{
  this._ifZeroCount = 0;
  this._literalLN = Pattern.compile("\\\\\n");
%init}

%{
  private int _ifZeroCount;
  private Pattern _literalLN;
  
  public int getLine() {
  	return yyline;
  }
  
  public int getColumn() {
  	return yycolumn;
  }

%}

D  =		[0-9]
L  =		[a-zA-Z_$]
H  =		[a-fA-F0-9]
E  =		[Ee][+-]?{D}+
FType =		(f|F|l|L)
IType =		(u|U|l|L)*

%state IFZERO

%%

<YYINITIAL> {
/* SPACE */
[ \t\v\f]					{ /* Ignore */ }

/* if 0 */
"#if"[ \t\v\f]+"0"			{ _ifZeroCount++; yybegin(IFZERO); }

/* #define */
"#define"			        { return "#define"; }

\\(\r\n|\r|\n)				{ /* Ignore */ }

/* macro */
("#"[a-z]+)					{ return yytext(); }

"$"{L}({L}|{D})*			{ return yytext(); } 

"#"							{ return "#"; }
"##"						{ return "##"; }

/* block comment */
"/*"~"*/"					{ /* Ignore */ }

/* line comment */
"//"~(\r\n|\r|\n)			{ /* Ignore */ }

/* annotation */
("@"[a-zA-Z]+)				{ return yytext(); }

/* SPACE */
[\r\n]						{ /* Ignore */ }

/* Reserved Words */
"auto"						{ return "auto"; }
"break"						{ return "break"; }
"case"						{ return "case"; }
"char"						{ return "char"; }
"const"						{ return "const"; }
"continue"					{ return "continue"; }
"default"					{ return "default"; }
"do"						{ return "do"; }
"double"					{ return "double"; }
"else"						{ return "else"; }
"enum"						{ return "enum"; }
"extern"					{ return "extern"; }
"float"						{ return "float"; }
"for"						{ return "for"; }
"goto"						{ return "goto"; }
"if"						{ return "if"; }
"int"						{ return "int"; }
"long"						{ return "long"; }
"register"					{ return "register"; }
"return"					{ return "return"; }
"short"						{ return "short"; }
"signed"					{ return "signed"; }
"sizeof"					{ return "sizeof"; }
"static"					{ return "static"; }
"struct"					{ return "struct"; }
"switch"					{ return "switch"; }
"typedef"					{ return "typedef"; }
"union"						{ return "union"; }
"unsigned"					{ return "unsigned"; }
"void"						{ return "void"; }
"volatile"					{ return "volatile"; }
"while"						{ return "while"; }

"("							{ return "("; }
")"							{ return ")"; }

"["							{ return "["; }
"]"							{ return "]"; }

"{"							{ return "{"; }
"}"							{ return "}"; }

"!"							{ return "!"; }
"!="						{ return "!="; }

"%"							{ return "%"; }
"%="						{ return "%="; }

"^"							{ return "^"; }
"^="						{ return "^="; }

"&"							{ return "&"; }
"&="						{ return "&="; }
"&&"						{ return "&&"; }

"|"							{ return "|"; }
"|="						{ return "|="; }
"||"						{ return "||"; }

"*"							{ return "*"; }
"*="						{ return "*="; }

"/"							{ return "/"; }
"/="						{ return "/="; }

"+"							{ return "+"; }
"+="						{ return "+="; }
"++"						{ return "++"; }

"-"							{ return "-"; }
"-="						{ return "-="; }
"--"						{ return "--"; }

"<"							{ return "<"; }
"<="						{ return "<="; }
"<<"						{ return "<<"; }
"<<="						{ return "<<="; }

">"							{ return ">"; }
">="						{ return ">="; }
">>"						{ return ">>"; }
">>="						{ return ">>="; }

"->"						{ return "->"; }
"..."						{ return "..."; }
"="							{ return "="; }
"=="						{ return "=="; }

","							{ return ","; }
"."							{ return "."; }
";"							{ return ";"; }
":"							{ return ":"; }
"?"							{ return "?"; }
"~"							{ return "~"; }

"\\"						{ return "\\"; }

/* variable name */
{L}({L}|{D})*				{ return yytext(); } 
                               
0[xX]{H}+{IType}?			{ return yytext(); }
0{D}+{IType}?				{ return yytext(); }
{D}+{IType}?				{ return yytext(); }

{D}+{E}{FType}?				{ return yytext(); }
{D}*"."{D}+({E})?{FType}?	{ return yytext(); }
{D}+"."{D}*({E})?{FType}?	{ return yytext(); }

/* String Literal */
\"(\\[0-9]+|\\.|\\\n|[^\\\"])*\"	{ String text = yytext(); text = _literalLN.matcher(text).replaceAll(""); return text; }

/* Character Literal */
\'(\\x[0-9a-fA-F]+|\\[0-9]+|\\[^\n\r]|\\\'|[^\'])\'	{ return  yytext(); }

}

<IFZERO> {
/* if ,ifdef */
[\r\n]"#if"[^a-zA-Z]		{ _ifZeroCount++; }
[\r\n]"#ifdef"[^a-zA-Z]		{ _ifZeroCount++; }

[\r\n]"#endif"[\r\n]		{ _ifZeroCount--; if (_ifZeroCount <= 0) { _ifZeroCount = 0; yybegin(YYINITIAL); }}

.|[\r\n]					{ /* Ignore */ }
}

.|[\r\n]					{ String illegalWord = yytext();
							  int yycode = (int) yychar;
                              String err = "Illegal character '" + illegalWord + "'(" + yycode + ") at line " + yyline + ", column " + yycolumn;
                              throw new RuntimeException(err); }
                                                              
<<EOF>>                 	{ return null; }

