//----------------------------------------------------------------------
//
//			File:			"RubySyntaxHighliter.cpp"
//			Created:		23-Mar-2012
//			Author:			Nobuhide Tsuda
//			Description:	RubySyntaxHighliter NX
//
//----------------------------------------------------------------------

/*

	Copyright (C) 2012 by Nobuhide Tsuda

	RuviEdit ̃CZX MIT{GPL ȃCZXłB 
	ۏ؁ET|[głAŗpłApAvł\[XR[h𗬗p邱Ƃ\łB 
	i\[XR[h𗬗pꍇAp̒쌠ECZXRuviEdit̂̂܂܂łj 
	M҂́AvO}ɂƂĕsRɂ܂Ȃ̂ɎRRƌGPLnȂ̂ŁA 
	RuviEdit ̃\[XGPLnvWFNgŎgp邱Ƃ֎~܂B 
	GPLvWFNgł͈؂̗p֎~܂ALGPLvWFNgł͓INɂ闬p͋܂B

*/

#include "RubySyntaxHighliter.h"
#include <QTextDocument>
#include <QTextEdit>
#include <QTextBlockUserData>
#include "EditView.h"
#include "Settings.h"
#include "ViewTokenizer.h"
#include <QDebug>

typedef const char cchar;
int getToken(const QString &text, int &ix, QString &token, bool &inBrace, char quote = '\0');

/**

	\̂߂̃p[X highlightBlock() ɂsPʂōs
	Ԃ̍sɓ`ɂ setCurrentBlockState(int) p
	here document ̏I[ʎq́A ʎq setCurrentBlockUserData(RSHUserData *) 
	<< sɐݒ肵AQƂ


*/
//	blockState
enum {
	IN_BLOCK_COMMENT = 1,
	IN__END__,				//	__END__ ݂̂̍sȍ~
	IN_MULTI_LINE_STRING,	//	}`C
	IN_HERE_DOCUMENT,		//	<<
	IN_TAIL_DOT,			//	Os̍Ōオ .
	IN_TAIL_COLON_COLON,	//	Os̍Ōオ ::
};

//	here document ̎ʎqL^邽߂̃NX
class RSHUserData : public QTextBlockUserData
{
public:
	RSHUserData(const QString &ident) : m_ident(ident) {}
	RSHUserData(const QString &ident, const QVector<StringItem> &quoteStack)
		: m_ident(ident), m_quoteStack(quoteStack) {}
	RSHUserData(const QVector<StringItem> &quoteStack) : m_quoteStack(quoteStack) {}
	~RSHUserData() {}

public:
	QString	m_ident;			//	here document ʎq
	QVector<StringItem>	m_quoteStack;
};
//----------------------------------------------------------------------

bool isLetterOrNumberOrUnderbar(const QChar &ch)
{
	return ch.isLetterOrNumber() || ch == QChar('_');
}
bool isLetterOrUnderbar(const QChar &ch)
{
	return ch.isLetter() || ch == QChar('_');
}
bool isOctalChar(const QChar &ch)
{
	return ch >= '0' && ch <= '7';
}
bool isHexChar(const QChar &ch)
{
	return (ch >= '0' && ch <= '9')
			|| (ch >= 'A' && ch <= 'F')
			|| (ch >= 'a' && ch <= 'f');
}

RubySyntaxHighliter::RubySyntaxHighliter( EditView * parent )
	: QSyntaxHighlighter((QTextEdit *)parent), m_view(parent)
{
	init();
	m_view->setRegexpFormat(&m_regexpFormat);
	m_view->setQuoteFormat(&m_quoteFormat);
}
struct KeywordsItem
{
	bool	m_pseudoVar;
	cchar	*m_text;
	cchar	*m_desc;
};
extern KeywordsItem keywords[];		//	L[[hA
void RubySyntaxHighliter::init()
{
    m_cmntStartExp = QRegExp("^=begin\\b", Qt::CaseSensitive);
    m_cmntEndExp = QRegExp("^=end\\b", Qt::CaseSensitive);
    m_backslashOrSharpExp = QRegExp("[\\\\#]");		//	[\\#]
    //m_hereDocumentExp = QRegExp("^\\s*EOT\\b", Qt::CaseSensitive);
    m_keywordFormat.setFontWeight(QFont::Bold);
    for(int i = 0; keywords[i].m_text != 0; ++i) {
    	if( !keywords[i].m_pseudoVar )
			m_keywords << keywords[i].m_text;
    	else
			m_pseudoVars << keywords[i].m_text;
    }
	updateFormats();
}
void RubySyntaxHighliter::updateFormats()
{
    //m_textFormat.setForeground(m_view->settings()->color(Settings::TEXT));
    m_cmntFormat.setForeground(m_view->settings()->color(Settings::COMMENT));
    //m_hereDocumentFormat.setForeground(m_view->settings()->color(Settings::HERE_DOCUMENT));
    m_sqStringFormat.setForeground(m_view->settings()->color(Settings::SQ_STRING));
    m_dqStringFormat.setForeground(m_view->settings()->color(Settings::DQ_STRING));
    m_bqStringFormat.setForeground(m_view->settings()->color(Settings::BQ_STRING));
    m_quoteFormat.setForeground(m_view->settings()->color(Settings::QUOTE));
    m_escCharFormat.setForeground(m_view->settings()->color(Settings::ESC_CHAR));
    m_keywordFormat.setForeground(m_view->settings()->color(Settings::KEYWORD));
    m_pseudoVarFormat.setForeground(m_view->settings()->color(Settings::PSEUDO_VAR));
    m_userKeywordFormat.setForeground(m_view->settings()->color(Settings::USER_KEYWORD));
    m_methodFormat.setForeground(m_view->settings()->color(Settings::METHOD));
    m_constantFormat.setForeground(m_view->settings()->color(Settings::CONSTANT));
    m_decimalFormat.setForeground(m_view->settings()->color(Settings::DECIMAL));
    m_instanceVarFormat.setForeground(m_view->settings()->color(Settings::INSTANCE_VAR));
    m_classVarFormat.setForeground(m_view->settings()->color(Settings::CLASS_VAR));
    m_globalVarFormat.setForeground(m_view->settings()->color(Settings::GLOBAL_VAR));
    m_symbolFormat.setForeground(m_view->settings()->color(Settings::SYMBOL));
    m_regexpFormat.setForeground(m_view->settings()->color(Settings::REGEXP));
}

RubySyntaxHighliter::~RubySyntaxHighliter()
{

}
//	undone 󔒃XLbv̓R[ōsAg[NʂԂ悤ɂ悳
//	@return token ŏ̕ index Ԃ
int getToken(const QString &text, int &ix, QString &token, bool &inBrace, char quote)
{
	token = QString();
	while( quote == '\0' ) {
		if( ix == text.length() )
			return -1;
		if( !text[ix].isSpace() )
			break;
		++ix;
	}
	int k = ix;
	QChar ch = text[ix++];
	if( quote || ch == '\'' || ch == '\"' || (ch == '}' && inBrace)
		|| (ch == '%' && ix + 1 < text.length() &&
					(text[ix] == 'Q' || text[ix] == 'q') && text[ix+1] == '(') )
	{
		inBrace = false;
		if( quote )
			ch = quote;
		else if( ch == '%' ) {
			ch = ')';	//	I[L
			ix += 2;	//	skip [Qq](
		}
		if( ch == '}' ) ch = '\"';
		while( ix < text.length() ) {
			QChar c = text[ix++];
			if( c == ch ) {
				break;
			}
			if( ch == '\"' && c == '#' && ix < text.length() && text[ix] == '{' ) {
				++ix;
				inBrace = true;
				break;
			}
			if( c == '\\' && ix < text.length() )
				++ix;
		}
		token = text.mid(k, ix - k);
	} else if( ch == '$' && ix < text.length() && !text[ix].isLetter() ) {
		++ix;
		token = text.mid(k, ix - k);	//	$ɉpȊȌꍇ
	} else if( isLetterOrNumberOrUnderbar(ch) ) {
		while( ix < text.length() && isLetterOrNumberOrUnderbar(text[ix]) )
			++ix;
		if( ix < text.length() && (text[ix] == '!' || text[ix] == '?') )
			++ix;
		token = text.mid(k, ix - k);
	} else if( (ch == '<'|| ch == '>') && ix < text.length() &&
		(text[ix] == '<' || text[ix] == '=') )		//	>< <> ЂƂɂȂ邯ǁA蔲
	{
		token = ch;
		token += text[ix];
		++ix;
	} else if( ch == '=' && ix < text.length() && text[ix] == '=' ) {
		token = "==";
		++ix;
	} else
		token = ch;
	return k;
}
bool isDecimalString(const QString &text)
{
	for(int i = 0; i < text.length(); ++i) {
		if( !text[0].isNumber() )
			return false;
	}
	return true;
}
QString RubySyntaxHighliter::hereDocumentIdent(const QTextBlock &block)
{
	QTextBlock b(block.previous());
	while( b.isValid() ) {
		RSHUserData *ptr = (RSHUserData *)b.userData();
		if( ptr != 0 && !ptr->m_ident.isEmpty() )
			return ptr->m_ident;
		b = b.previous();
	}
	return QString();
}
//	".." '...' ̏ꍇɌĂ΂
//	}`CAhere document  #{exp} ̓WJIɂĂ΂
bool RubySyntaxHighliter::highlightString(const ViewTokenizer &tn, int /*state*/, uchar strType)
{
	int i = 0;
	int k = tn.tokenOffset();
	QString token = tn.tokenText();
	bool bMultiLine = !tn.inBrace() && tn.mlString();
	if( tn.quoteLength() != 0 ) {
		setFormat(k, tn.quoteLength(), m_quoteFormat);
		k += tn.quoteLength();
		token = token.mid(tn.quoteLength());
	}
	QTextCharFormat format = strType == ViewTokenizer::SQ_STRING ? m_sqStringFormat
								: strType == ViewTokenizer::DQ_STRING ? m_dqStringFormat
								: strType == ViewTokenizer::BQ_STRING ? m_bqStringFormat
								: m_regexpFormat;
	while( (i = m_backslashOrSharpExp.indexIn(token, i)) >= 0 && i + 1 < token.length() ) {
		if( token[i] == '#' ) {
			if( strType != ViewTokenizer::SQ_STRING ) {
				if( i + 2 < token.length()
					&& (token[i + 1] == '@' || token[i + 1] == '$')
					&& isLetterOrUnderbar(token[i + 2]) )
				{
					int s = i;
					i += 3;
					while( i < token.length() && isLetterOrNumberOrUnderbar(token[i]) )
						++i;
					setFormat(k, s, format);
					setFormat(k + s, i - s, token[s + 1] == '@' ? m_instanceVarFormat : m_globalVarFormat);
					k += i;
					token = token.mid(i);
					i = 0;
				} else if( i + 3 < token.length()
					&& token[i + 1] == '@'
					&& token[i + 2] == '@'
					&& isLetterOrUnderbar(token[i + 3]) )
				{
					int s = i;
					i += 4;
					while( i < token.length() && isLetterOrNumberOrUnderbar(token[i]) )
						++i;
					setFormat(k, s, format);
					setFormat(k + s, i - s, m_classVarFormat);
					k += i;
					token = token.mid(i);
					i = 0;
				} else
					++i;
			} else
				++i;
		} else {
			int n = 2;
			if( strType != ViewTokenizer::SQ_STRING
				&& i + 6 < token.length()
				&& token.mid(i+1, 5) == "M-\\C-" )
			{
				setFormat(k, i, format);
				setFormat(k + i, n = 7, m_escCharFormat);
			} else if( strType != ViewTokenizer::SQ_STRING
				&& i + 3 < token.length()
				&& ((isOctalChar(token[i+1]) && isOctalChar(token[i+2]) && isOctalChar(token[i+3]))
	                || (token[i+1] == 'x' && isHexChar(token[i+2]) && isHexChar(token[i+3]))
					|| ((token[i+1] == 'C' || token[i+1] == 'M') && token[i+2] == '-')) )
			{
				setFormat(k, i, format);
				setFormat(k + i, n = 4, m_escCharFormat);
			} else if( strType != ViewTokenizer::SQ_STRING
				&& i + 5 < token.length()
				&& token[i+1] == 'u'
				&& token[i+2].isNumber() && token[i+3].isNumber()
				&& token[i+4].isNumber() && token[i+5].isNumber() )
			{
				setFormat(k, i, format);
				setFormat(k + i, n = 6, m_escCharFormat);
			} else if( strType != ViewTokenizer::SQ_STRING && i + 2 < token.length() && token[i + 1] == 'c' ) {
				setFormat(k, i, format);
				setFormat(k + i, n = 3, m_escCharFormat);
			} else if( strType != ViewTokenizer::SQ_STRING || token[i + 1] == '\\' || token[i + 1] == '\'' ) {
				setFormat(k, i, format);
				setFormat(k + i, 2, m_escCharFormat);
			} else {
				if( strType != ViewTokenizer::REGEXP )
					setFormat(k, i + 2, format);
				else
					setFormat(k, i + 2, m_regexpFormat);
			}
			k += i + n;
			token = token.mid(i + n);
			i = 0;
		}
	}
	if( !token.isEmpty() ) {
		if( tn.mlString() )
			setFormat(k, token.length(), format);
		else {
			int n = tn.tokenCloseQuoteLength();
			if( token.length() > n )
				setFormat(k, token.length() - n, format);
			if( n )
				setFormat(k + token.length() - n, n, m_quoteFormat);
		}
	}
	if( bMultiLine ) {
		//if( !state ) state = IN_MULTI_LINE_STRING;
		setCurrentBlockState(IN_MULTI_LINE_STRING);
		//setCurrentBlockState(state | ((int)tn.nestLevel() << 8)
		//					| (strType == ViewTokenizer::DQ_STRING ? IN_DOUBLE_QUOTE : 0) | tn.quote() );
		setCurrentBlockUserData(new RSHUserData(tn.quoteStack()));
	}
	return bMultiLine;
}
//	ށA#{...} ubN̂܂܁AsɒBꍇ́AquoteStack ɏς݁A
//	block  userData ɕۑ
//	āAŏɂ̏𕜋A
void RubySyntaxHighliter::highlightBlock ( const QString & text )
{
	QVector<StringItem> quoteStack;
	QTextBlock b(currentBlock().previous());
	if( b.isValid() ) {
		RSHUserData *ptr = (RSHUserData *)b.userData();
		if( ptr != 0 )
			quoteStack = ptr->m_quoteStack;
	}

	const int state = previousBlockState() == -1 ? 0 : previousBlockState();
#if		0	//def	_DEBUG
	m_view->doOutput2(QString("state = %1 text = '%2'\n")
						.arg(state, 4, 16)	//	16io
						.arg(text));
#endif
	//	V[Uf[^ZbgƁAÂf[^͎IɃf[ĝŁA
	//	ȉ2s͕KvȂ
	//QTextBlockUserData *ptr = currentBlockUserData();
	//if( ptr != 0 ) delete ptr;
	setCurrentBlockUserData(0);
	if( state == IN__END__ ) {
		setCurrentBlockState(IN__END__);
		return;
	}
	if( state == IN_BLOCK_COMMENT ) {
		setFormat(0, text.length(), m_cmntFormat);
		setCurrentBlockState(m_cmntEndExp.indexIn(text) == 0 ? 0 : IN_BLOCK_COMMENT);
		if( !quoteStack.isEmpty() )
			setCurrentBlockUserData(new RSHUserData(quoteStack));
		return;
	}
	ViewTokenizer tn(text, m_view);
	tn.setQuoteStack(quoteStack);
	if( quoteStack.isEmpty() || tn.inBrace() ) {
		if( text == "__END__" ) {
			setCurrentBlockState(IN__END__);
			return;
		}
		if( m_cmntStartExp.indexIn(text) == 0 ) {	//	ubNRgJn
			setFormat(0, text.length(), m_cmntFormat);
			setCurrentBlockState(IN_BLOCK_COMMENT);
			if( !quoteStack.isEmpty() )
				setCurrentBlockUserData(new RSHUserData(quoteStack));
			return;
		}
	}
	QChar nch;
	if( !quoteStack.isEmpty() ) {
		switch( quoteStack.last().m_type ) {
		case ViewTokenizer::HERE_DOCUMENT: {
			QString t = text;
			if( quoteStack.last().m_indent )
				t = t.trimmed();
			if( t == hereDocumentIdent(currentBlock()) ) {
				setFormat(0, text.length(), m_constantFormat);
				setCurrentBlockState(0);
				return;
			} else {
				tn.nextString(0, 0, quoteStack.last().m_strType);
				if( highlightString(tn, 0, quoteStack.last().m_strType) ) {
					setCurrentBlockState(state);
					return;
				}
				//	#{ 𔭌ꍇ͉ɃX[
			}
			break;
		}
		case ViewTokenizer::SQ_STRING:
		case ViewTokenizer::DQ_STRING:
		case ViewTokenizer::BQ_STRING:
			//	}`C̏ꍇ
			tn.nextString();
			if( highlightString(tn, 0, quoteStack.last().m_type) ) return;
			//	XgOIꍇ or #{ 𔭌ꍇ͈ȉɃX[
			break;
		}
	}
	if( state == IN_TAIL_DOT )
		tn.setTokenText(".");
	else if( state == IN_TAIL_COLON_COLON )
		tn.setTokenText("::");
	int nextState = 0;
	QString hereDocIdent;
	while( tn.nextToken() != ViewTokenizer::END_OF_BUFFER ) {
		const int ix = tn.ix();		//	̕ʒu
		const int k = tn.tokenOffset();		//	݃g[Nʒu
		QString token = tn.tokenText();	//	݃g[N
		if( token == "#" ) {
			setFormat(k, text.length() - k, m_cmntFormat);
			break;
		}
		if( token == "@" && ix < text.length() ) {
			if( isLetterOrUnderbar(text[ix]) ) {
				tn.nextToken();
				setFormat(k, tn.tokenLength() + 1, m_instanceVarFormat);
			} else if( text[ix] == QChar('@') && ix + 1 < text.length()
						&& isLetterOrUnderbar(text[ix+1]) )
			{
				tn.nextToken();		//	skip @
				//++ix;	//	skip @
				tn.nextToken();
				setFormat(k, tn.tokenLength() + 2, m_classVarFormat);
			}
			continue;
		}
		if( token == "$" && ix < text.length() ) {
			if( isLetterOrUnderbar(text[ix]) ) {
				tn.nextToken();
				setFormat(k, tn.tokenLength() + 1, m_globalVarFormat);
			}
			continue;
		}
		nch = tn.nextChar();
		if( token == ":" && ix < text.length() ) {
			if( nch == '\"' || nch == '\'' ||
						(nch != '@' && nch != '$' && nch.isSymbol()) )
			{
				tn.nextToken();
				setFormat(k, tn.tokenLength() + 1, m_symbolFormat);
			} else {
				int n = 1;		//	 1 for ':'
				while( (tn.nextChar() == '@' || tn.nextChar() == '$') ) {
					tn.nextToken();	//	Ǝ蔲
					++n;
				}
				if( isLetterOrUnderbar(tn.nextChar()) ) {
					tn.nextToken();
					setFormat(k, tn.tokenLength() + n, m_symbolFormat);
				}
			}
			continue;
		}
		if( tn.tokenType() == ViewTokenizer::NUMBER /*isDecimalString(token)*/ ) {
			setFormat(k, tn.tokenLength(), m_decimalFormat);
			continue;
		}
		if( isLetterOrUnderbar(token[0]) ) {
			//	L[[h .\bh D 12/04/26
			if( //(k >=2 && text[k-1] == QChar('.') && text[k-2] != QChar('.'))
				tn.prevTokenText() == "."
				|| (/*tn.prevTokenText() == "def" &&*/ tn.nextChar() == '(')
				|| (tn.prevTokenText() == "::" && token[0].isLower()) )
			{
				setFormat(k, tn.tokenLength(), m_methodFormat);
				continue;
			}
			if( m_keywords.indexOf(token) >= 0 ) {
				setFormat(k, tn.tokenLength(), m_keywordFormat);
				continue;
			}
			if( m_pseudoVars.indexOf(token) >= 0 ) {
				setFormat(k, tn.tokenLength(), m_pseudoVarFormat);
				continue;
			}
			if( m_view->settings()->isUserKeyword(token) ) {
				setFormat(k, tn.tokenLength(), m_userKeywordFormat);
				continue;
			}
		}
		if( tn.prevTokenText() == "def"  	//	ZqI[o[h̏ꍇ
			|| (tn.prevTokenText() == "." && tn.nextChar() == '(') )
		{
		
			setFormat(k, tn.tokenLength(), m_methodFormat);
			continue;
		}
		if( token[0].isUpper() ) {
			setFormat(k, tn.tokenLength(), m_constantFormat);
			continue;
		}
		if( tn.tokenType() == ViewTokenizer::SQ_STRING
			|| tn.tokenType() == ViewTokenizer::DQ_STRING
			|| tn.tokenType() == ViewTokenizer::BQ_STRING )
		{
			//closeQuote = tn.quote();
			if( highlightString(tn, 0, tn.tokenType())
				&& nextState != IN_HERE_DOCUMENT )
			{
				return;		//	}`C̏ꍇ
			}
			continue;
		}
		if( tn.tokenType() == ViewTokenizer::REGEXP ) {
			highlightString(tn, 0, ViewTokenizer::REGEXP);
			continue;
		}
		if( token == "<<" ) {
			nextState = IN_HERE_DOCUMENT;
			bool indent = false;
			uchar strType = ViewTokenizer::DQ_STRING;
			uchar type = ViewTokenizer::HERE_DOCUMENT;
			if( tn.nextChar() == '-' ) {
				tn.skipChar();
				indent = true;
			}
			QString ident;
			int offset;
			QChar nch = tn.nextChar();
			if( nch == '\'' || nch == '\"' || nch == '`' ) {
				switch( nch.unicode() ) {
				case '\'':
					strType = ViewTokenizer::SQ_STRING;
					break;
				case '\"':
					strType = ViewTokenizer::DQ_STRING;
					break;
				case '`':
					strType = ViewTokenizer::BQ_STRING;
					break;
				}
				tn.nextString((uchar)nch.unicode(), 1, ViewTokenizer::SQ_STRING);		//	#{exp} WJ͍sȂ
				ident = tn.tokenText();
				if( ident[ident.size() - 1] != nch )
					continue;
				offset = tn.tokenOffset() + 1;
				ident = ident.mid(1, ident.size() - 2);
			} else {
				if( !tn.nextChar().isLetter() ) continue;
				tn.nextToken();
				ident = tn.tokenText();
				offset = tn.tokenOffset();
			}
			setFormat(offset, ident.size(), m_constantFormat);
			hereDocIdent = ident;
			tn.pushBackToQuoteStack(StringItem(type, strType, indent));
#if 0
			quoteStack = tn.quoteStack();
			quoteStack.push_back(StringItem(type, strType, indent));	//	undone #{exp} s
			tn.setQuoteStack(quoteStack);
#endif
			//setCurrentBlockUserData(new RSHUserData(ident, quoteStack));
			//setCurrentBlockState(nstate);
			//return;
		}
		if( tn.tokenType() == ViewTokenizer::PERCENT_SYMBOL ) {
			setFormat(k, tn.tokenLength(), m_symbolFormat);
			continue;
		}
	}
	if( !tn.quoteStack().isEmpty() )
		setCurrentBlockUserData(new RSHUserData(hereDocIdent, tn.quoteStack()));
	if( !nextState ) {
		if( tn.prevTokenText() == "." )
			nextState = IN_TAIL_DOT;
		else if( tn.prevTokenText() == "::" )
			nextState = IN_TAIL_COLON_COLON;
	}
	setCurrentBlockState(nextState);
}
