package jp.ac.osaka_u.ist.sel.similarity.hash.parser;

import java.util.regex.Pattern;

import org.apache.log4j.Logger;

%%

%public
%class JavaTokenStream
%implements TokenStream
%function next
%type String
%eofclose

%unicode
%line
%column

%init{
  this._ifZeroCount = 0;
  this._literalLN = Pattern.compile("\\\\\n");
%init}

%{
  private int _ifZeroCount;
  private Pattern _literalLN;
  
  public int getLine() {
  	return yyline;
  }
  
  public int getColumn() {
  	return yycolumn;
  }
%}

D  =		[0-9]
L  =		[a-zA-Z_$]
H  =		[a-fA-F0-9]
E  =		[Ee][+-]?{D}+
FType =		(f|F|l|L)
IType =		(u|U|l|L)*

%state IFZERO

%%

<YYINITIAL> {
/* SPACE */
[ \t\v\f]					{ /* Ignore */ }

/* if 0 */
"#if"[ \t\v\f]+"(false)"	{ _ifZeroCount++; yybegin(IFZERO); }

/* macro */
("#"[a-z]+)					{ return yytext(); }

"#"							{ return "#"; }
"##"						{ return "##"; }

/* block comment */
"/*"~"*/"					{ /* Ignore */ }

/* line comment */
"//"~(\r\n|\r|\n)			{ /* Ignore */ }

/* annotation */
("@"[a-zA-Z]+)				{ return yytext(); }

/* SPACE */
[\r\n]						{ /* Ignore */ }

/* Reserved Words */
"byte"						{ return "byte"; }
"char"						{ return "char"; }
"short"						{ return "short"; }
"int"						{ return "int"; }
"long"						{ return "long"; }
"float"						{ return "float"; }
"double"					{ return "double"; }
"boolean"					{ return "boolean"; }
"true"						{ return "true"; }
"false"						{ return "false"; }
"void"						{ return "void"; }
"if"						{ return "if"; }
"else"						{ return "else"; }
"switch"					{ return "switch"; }
"case"						{ return "case"; }
"class"						{ return "class"; }
"default"					{ return "default"; }
"for"						{ return "for"; }
"while"						{ return "while"; }
"do"						{ return "do"; }
"continue"					{ return "continue"; }
"break"						{ return "break"; }
"return"					{ return "return"; }
"package"					{ return "package"; }
"import"					{ return "import"; }
"instanceof"				{ return "instanceof"; }
"interface"					{ return "interface"; }
"extends"					{ return "extends"; }
"implements"				{ return "implements"; }
"this"						{ return "this"; }
"super"						{ return "super"; }
"new"						{ return "new"; }
"null"						{ return "null"; }
"public"					{ return "public"; }
"protected"					{ return "protected"; }
"private"					{ return "private"; }
"final"						{ return "final"; }
"static"					{ return "static"; }
"abstract"					{ return "abstract"; }
"native"					{ return "native"; }
"synchronized"				{ return "synchronized"; }
"volatile"					{ return "volatile"; }
"transient"					{ return "transient"; }
"try"						{ return "try"; }
"catch"						{ return "catch"; }
"finally"					{ return "finally"; }
"throw"						{ return "throw"; }
"throws"					{ return "throws"; }
"assert"					{ return "assert"; }
"enum"						{ return "enum"; }
"const"						{ return "const"; }
"goto"						{ return "goto"; }
"strictfp"					{ return "strictfp"; }

"("							{ return "("; }
")"							{ return ")"; }

"["							{ return "["; }
"]"							{ return "]"; }

"{"							{ return "{"; }
"}"							{ return "}"; }

"!"							{ return "!"; }
"!="						{ return "!="; }

"%"							{ return "%"; }
"%="						{ return "%="; }

"^"							{ return "^"; }
"^="						{ return "^="; }

"&"							{ return "&"; }
"&="						{ return "&="; }
"&&"						{ return "&&"; }

"|"							{ return "|"; }
"|="						{ return "|="; }
"||"						{ return "||"; }

"*"							{ return "*"; }
"*="						{ return "*="; }

"/"							{ return "/"; }
"/="						{ return "/="; }

"+"							{ return "+"; }
"+="						{ return "+="; }
"++"						{ return "++"; }

"-"							{ return "-"; }
"-="						{ return "-="; }
"--"						{ return "--"; }

"<"							{ return "<"; }
"<="						{ return "<="; }
"<<"						{ return "<<"; }
"<<="						{ return "<<="; }

">"							{ return ">"; }
">="						{ return ">="; }
">>"						{ return ">>"; }
">>="						{ return ">>="; }

"->"						{ return "->"; }
"..."						{ return "..."; }
"="							{ return "="; }
"=="						{ return "=="; }

","							{ return ","; }
"."							{ return "."; }
";"							{ return ";"; }
":"							{ return ":"; }
"?"							{ return "?"; }
"~"							{ return "~"; }

/* variable name */
{L}({L}|{D}|\.)*			{ return yytext(); } 
                               
0[xX]{H}+{IType}?			{ return yytext(); }
0{D}+{IType}?				{ return yytext(); }
{D}+{IType}?				{ return yytext(); }

{D}+{E}{FType}?				{ return yytext(); }
{D}*"."{D}+({E})?{FType}?	{ return yytext(); }
{D}+"."{D}*({E})?{FType}?	{ return yytext(); }

/* String Literal */
\"(\\[0-9]+|\\.|\\\n|[^\\\"])*\"	{ String text = yytext(); text = _literalLN.matcher(text).replaceAll(""); return text; }

/* Character Literal */
\'(\\u[0-9a-fA-F]+|\\x[0-9a-fA-F]+|\\[0-9]+|\\[^\n\r]|\\\'|[^\'])\'	{ return  yytext(); }

}

<IFZERO> {
/* if ,ifdef */
[\r\n]"#if"[^a-zA-Z]		{ _ifZeroCount++; }
[\r\n]"#ifdef"[^a-zA-Z]		{ _ifZeroCount++; }

[\r\n]"#end"[\r\n]			{ _ifZeroCount--; if (_ifZeroCount <= 0) { _ifZeroCount = 0; yybegin(YYINITIAL); }}

.|[\r\n]					{ /* Ignore */ }
}

.|[\r\n]					{ String illegalWord = yytext();
							  int yycode = (int) yychar;
                              String err = "Illegal character '" + illegalWord + "'(" + yycode + ") at line " + yyline + ", column " + yycolumn;
                              throw new RuntimeException(err); }
                                                              
<<EOF>>                 	{ return null; }

