// Lexer.h
// (c) 2004-2005 exeal

#ifndef _LEXER_H_
#define _LEXER_H_
#include "AscensionCommon.h"
#include "../../Manah/SmallObject.h"
#include <list>
#include <set>
#include <map>
#include <algorithm>	// std::lower_bound


namespace Ascension {

	/// L[[hARgȂǕ݂ނ̃g[Nʂl
	typedef short	TokenCookie;
	/// ʂȃNbL[l
	const TokenCookie	NullCookie = 0;

	/// g[N̎
	/// @see	EmphaticTextType
	enum TokenType {
		TT_FIRST,
		TT_WHITESPACE = TT_FIRST,	///< 󔒗ޕ
		TT_TAB,						///< ^u
		TT_KEYWORD,					///< L[[h
		TT_ANNOTATION,				///<  (Rg)
		TT_OPERATOR,				///< Zq
		TT_IDENTIFIER,				///< ʎq
		TT_NUMERAL,					///< 
		TT_NUMBER,					///< l
		TT_SINGLEQUOTATION,			///< dp
		TT_DOUBLEQUOTATION,			///< dp
		TT_OTHERQUOTATION,			///< ̑ Unicode p
		TT_ASCII_CONTROL,			///< ASCII 䕶
		TT_UNICODE_CONTROL,			///< Unicode 䕶
		TT_UNSPECIFIED,				///< 
		TT_COUNT
	};

	/// ľ`
	enum NumberFormat {
		NF_NUMERAL_FOLLOWED_BY_ALPHANUMERAL,	///< ̌ɃAt@xbgAA܂͏_ ()
		NF_CPLUSPLUS,							///< C++ le
		NF_PERL,								///< Perl 5 le
		NF_RUBY,								///< Ruby 1.8 le
		NF_VBSCRIPT,							///< VBScript 5.6 le
		NF_JAVASCRIPT_15,						///< JavaScript 1.5 le
		NF_JAVASCRIPT_20,						///< JavaScript 2.0 le
		NF_COUNT,
	};

	/// ߂̋K
	typedef uchar	AnnotationConstraint;
	const AnnotationConstraint	AC_NONE				= 0x00;	///< 
	const AnnotationConstraint	AC_ONLYSTARTOFLINE	= 0x01;	///< ŝ
	const AnnotationConstraint	AC_ONLYHEADOFLINE	= 0x02;	///< 󔒗ޕȊO̍ŏ̃g[N̂


	/// g[N
	/// @see	Tokens
	class CToken : public Manah::CSelfAssertable, public Manah::CUseMemoryPool<CToken> {
		friend class CLexer;

		// RXgN^
	public:
		/// ftHgRXgN^
		CToken() {}
		/// RXgN^
		CToken(length_t i, TokenType type, TokenCookie nCookie) : m_i(i), m_type((type << 12) | nCookie) {}

		// \bh
	public:
		/// g[NɊ֘AtꂽNbL[lԂ (RgAL[[ĥݗL)
		TokenCookie GetCookie() const {
			AssertValid();
			return m_type & 0x0FFF;
		}
		/// ͕̒ł̐擪̈ʒuԂ
		length_t GetIndex() const {
			AssertValid();
			return m_i;
		}
		/// g[N̎ނԂ
		TokenType GetType() const {
			AssertValid();
			return static_cast<TokenType>(m_type >> 12);
		}
	private:
		void _SetCookie(TokenCookie nCookie) {
			AssertValid();
			m_type &= 0xF000;
			m_type |= 0x0FFF & nCookie;
		}
		void _SetType(TokenType type) {
			AssertValid();
			m_type &= 0x0FFF;
			m_type |= (type & 0x000F) << 12;
		}

		// f[^o
	private:
		length_t	m_i;	// sł̈ʒu
		ushort		m_type;	// 4rbg (TokenType)A12rbgʒl (TokenCookie)
	};

	/// g[N̔z
	struct Tokens {
		std::size_t	count;	///< g[Ň
		CToken*		array;	///< z

		/// RXgN^
		Tokens() : count(0), array(0) {}
		/// fXgN^
		~Tokens() {delete[] array;}
	};


	/**
	 *	@brief	͊
	 *
	 *	L[[h̑啶ʂȂꍇ̃P[XtHfBO Unicode 4.0 ɂ͏]ĂȂB
	 *
	 *	AddXXXX œo^g[N̗D揇ʂ͕񒷂̒̂ȂB
	 */
	class CLexer : public Manah::CSelfAssertable, public Manah::CNoncopyable {
	public:
		/// ͊̃Cxgnh
		interface IEventListener {
			///	fXgN^
			virtual			~IEventListener() {}
			/**
			*	@brief	L[[hRgǉꂽ
			*
			*	̌ OnLexerChanged Ăяo
			*	@param type		g[N̎
			*	@param nCookie	g[ÑNbL[
			*/
			virtual	void	OnLexerAddedIdentifiedToken(TokenType type, TokenCookie nCookie) = 0;
			///	͂̋Kς
			virtual void	OnLexerChanged() = 0;
			///	͂̋KSč폜ꂽ
			virtual void	OnLexerCleared() = 0;
			/**
			*	@brief	L[[hRg폜ꂽ
			*
			*	̌ OnLexerChanged Ăяo
			*	@param type		g[N̎
			*	@param nCookie	g[ÑNbL[
			*/
			virtual	void	OnLexerRemovedIdentifiedToken(TokenType type, TokenCookie nCookie) = 0;
		};

		// RXgN^
	public:
		CLexer(IEventListener* pEventListener);
		virtual ~CLexer();

		// \bh
	public:
		/*  */
		TokenCookie	AddMultilineAnnotation(const string_t& strStartDelimiter,
						const string_t& strEndDelimiter, AnnotationConstraint constraint = AC_NONE);
		TokenCookie	AddSinglelineAnnotation(const string_t& strStartDelimiter, AnnotationConstraint constraint = AC_NONE);
		TokenCookie	AddSinglelineAnnotation(const string_t& strStartDelimiter,
						const string_t& strEndDelimiter, AnnotationConstraint constraint = AC_NONE);
		TokenCookie	AddKeywords(const std::set<string_t>& keywords);
		void		EnableBackSolidusEscape(bool bEnable);
		void		EnableToken(TokenType type, bool bEnable);
		void		EnableUnicodeAlphabets(bool bEnable);
		void		EnableUnicodeWhiteSpaces(bool bEnable);
		void		Freeze();
		bool		GetBracketTraits(char_t chBracket, char_t& chPair, bool& bOpener) const;
		void		GetKeywords(string_t* pKeywords) const;
		NumberFormat	GetNumberFormat() const;
		void		IgnoreCase(bool bIgnore);
		bool		IsBackSolidusEscapeEnabled() const;
		bool		IsCaseSensitive() const;
		bool		IsFreezed() const;
		bool		IsTokenEnabled(TokenType type) const;
		bool		IsUnicodeAlphabetsEnabled() const;
		bool		IsUnicodeWhiteSpacesEnabled() const;
		void		RemoveAll();
		void		RemoveIdentifiedToken(TokenCookie nCookie) throw(std::invalid_argument);
		void		Reset();
		void		SetAdditionalAlphabets(const char_t* pwszAlphabets, length_t cch);
		void		SetAdditionalAlphabets(const std::set<CodePoint>& alphabets);
		void		SetBrackets(const char_t* pwszBrackets)  throw(std::invalid_argument);
		void		SetNumberFormat(NumberFormat format) throw(std::invalid_argument);
		void		SetOperators(const std::set<string_t>& operators);
		void		Unfreeze();

		/*  */
		void		Parse(const string_t& str, TokenCookie& nCookie, std::list<CToken>& tokens) const;
		TokenCookie	ParseMultilineAnnotation(const string_t& str, TokenCookie nCookie) const;

		/*  */
		static CodePoint	GetQuotationCloser(CodePoint opener);
		static bool			IsAsciiControl(CodePoint cp);
		static bool			IsDigit(CodePoint cp);
		bool				IsIdentifierContinueCodePoint(CodePoint cp) const;
		bool				IsIdentifierStartCodePoint(CodePoint cp) const;
		static bool			IsUnicodeControl(CodePoint cp);
		bool				IsWhiteSpace(CodePoint cp, bool bIncludeTab) const;

		/* g[N̐؂o */
		static length_t	EatAsciiControls(const char_t* pwsz, length_t cch);
		length_t		EatIdentifier(const char_t* pwsz, length_t cch) const;
		bool			EatKeyword(const char_t* pwsz, length_t cch, TokenCookie& nCookie) const;
		length_t		EatOperators(const char_t* pwsz, length_t cch) const;
		static length_t	EatQuotation(const char_t* pwsz, length_t cch, bool bEscapeByBackSolidus);
		length_t		EatMultilineAnnotation(const char_t* pwsz, length_t cch,
							AnnotationConstraint constraint, TokenCookie& nCookie, bool& bContinued) const;
		static length_t	EatNumerals(const char_t* pwsz, length_t cch);
		length_t		EatNumbers(const char_t* pwsz, length_t cch) const;
		length_t		EatSinglelineAnnotation(const char_t* pwsz, length_t cch,
							AnnotationConstraint constraint, TokenCookie& nCookie) const;
		static bool		EatUnicodeControls(const char_t* pwsz, length_t cch);
		length_t		EatWhiteSpaces(const char_t* pwsz, length_t cch, bool bIncludeTab) const;

		/* [eBeB */
		static CodePoint	CaseFoldCharacter(CodePoint cp);
		static void			CaseFoldString(char_t* pwsz, length_t cch);
		static void			CaseFoldString(const char_t* pwsz, length_t cch, char_t* pwszDest);
		static char_t*		CaseFoldString(const char_t* pwsz, length_t cch);
		static length_t		EatMailAddress(const char_t* pwsz, length_t cch);
		static length_t		EatUrlString(const char_t* pwsz, length_t cch);
		static void			GetAsciiControlSubstitutionGlyph(uchar ch, char_t* pwszGlyph);

	private:
		void		_ClearKeywords();
		template<NumberFormat format>
		length_t	_EatNumbers(const char_t* pwsz, length_t cch) const;
		void		_NotifyChange();

		// Jo萔
	public:
		static const char_t	m_wszDefaultOpeners[];
		static const char_t	m_wszUnicodeOpeners[];

		// Jf[^^
	private:
		/// L[[h𔻒肷邽߂̃nbVe[u
		class _CHashTable {
		public:
			_CHashTable(const std::set<string_t>& data, bool bCaseSensitive);
			~_CHashTable();
			bool Find(const char_t* pwsz, length_t cch) const;
			static ulong _GetHashCode(const char_t* pwsz, length_t cch);
		private:
			struct _Entry {
				string_t	str;
				_Entry*		pNext;
				_Entry(const string_t& str_) : str(str_) {}
				~_Entry() {delete pNext;}
			};
			_Entry**			m_ppEntries;
			const std::size_t	m_cEntries;
			std::size_t			m_cchMax;	// ŒL[[h
			const bool			m_bCaseSensitive;
		};
		/// sŏIPs
		struct _SinglelineAnnotationEndedByBreak {
			string_t	strStartDelimiter;		///< Jn
			AnnotationConstraint	constraint;	///< 
		};

		/// wf~^ŏIPs
		struct _SinglelineAnnotationEndedByDelimiter {
			string_t	strStartDelimiter;		///< Jn
			string_t	strEndDelimiter;		///< I
			AnnotationConstraint	constraint;	///<  (Jnf~^ɂ̂݉e)
		};

		/// s
		struct _MultilineAnnotation {
			string_t	strStartDelimiter;		///< Jn
			string_t	strEndDelimiter;		///< I
			AnnotationConstraint	constraint;	///<  (Jnf~^ɂ̂݉e)
		};

		typedef std::map<TokenCookie, _CHashTable*>								_KeywordsMap;
		typedef std::map<TokenCookie, _SinglelineAnnotationEndedByBreak>		_SAnnotationBMap;
		typedef std::map<TokenCookie, _SinglelineAnnotationEndedByDelimiter>	_SAnnotationDMap;
		typedef std::map<TokenCookie, _MultilineAnnotation>						_MAnnotationMap;
		typedef std::set<string_t, std::greater<string_t> >						_OperatorSet;
		typedef std::map<char_t, _OperatorSet>									_OperatorMap;

		// f[^o
	private:
		bool				m_bFreezed;
		bool				m_bCaseSensitive;
		bool				m_bEscapeByBackSolidus;
		bool				m_bEnableUnicodeAlphabets;
		bool				m_bEnableUnicodeWhiteSpaces;
		bool				m_enabledTokenTypes[TT_COUNT];
		NumberFormat		m_numberFormat;
		char_t*				m_pwszBrackets;
		static TokenCookie	m_nCookie;
		IEventListener*		m_pEventListener;
		std::set<CodePoint>	m_additionalAlphabets;	// At@xbgƂ݂ȂR[h|Cg

		_KeywordsMap		m_keywords;					// L[[hQ
		_SAnnotationBMap	m_singlelineAnnotationBs;	// Ps
		_SAnnotationDMap	m_singlelineAnnotationDs;	// Ps
		_MAnnotationMap		m_multilineAnnotations;		// s
		_OperatorMap		m_operators;				// Zq

		static const char_t			m_casedCodesUcs2[];
		static const char_t			m_caseFoldedUcs2[];
		static const std::size_t	m_cCasedCodesUcs2;
//		static const CodePoint		m_casedCodesUcs4[];
//		static const CodePoint		m_caseFoldedUcs4[];
//		static const std::size_t	m_cCasedCodesUcs4;
	};


	namespace Private {
		// R[h|Cgނ̂߂̔z
#if ASCENSION_UNICODE_VERSION != 0x0410
#error These arrays are based on old version of Unicode.
#endif
		// ʕނ Zs (Separator, Space) łR[h|Cg
		// NOTE: ListProp.txt ɂ WhiteSpace ƂvpeB邪
		// ̔z͂̏W (Ⴆΐ䕶Ȃǂ͊OĂ)
		static const CodePoint	zs[] = {
			0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
			0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F,
			0x205F, 0x3000,
		};
		// ʕނ Cc (Other, Control) ACf (Other, Format) łR[h|Cg
		static const CodePoint	cc[] = {
			0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
			0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
			0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
			0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
			0x007F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086,
			0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E,
			0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096,
			0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E,
			0x009F,
		};
		static const CodePoint	cf[] = {
			0x00AD, 0x0600, 0x0601, 0x0602, 0x0603, 0x06DD, 0x070F, 0x17B4,
			0x17B5, 0x200B, 0x200C, 0x200D, 0x200E, 0x200F, 0x202A, 0x202B,
			0x202C, 0x202D, 0x202E, 0x2060, 0x2061, 0x2062, 0x2063, 0x206A,
			0x206B, 0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF, 0xFFF9, 0xFFFA,
			0xFFFB, 0x1D173, 0x1D174, 0x1D175, 0x1D176, 0x1D177, 0x1D178, 0x1D179,
			0x1D17A, 0xE0001, 0xE0020, 0xE0021, 0xE0022, 0xE0023, 0xE0024, 0xE0025,
			0xE0026, 0xE0027, 0xE0028, 0xE0029, 0xE002A, 0xE002B, 0xE002C, 0xE002D,
			0xE002E, 0xE002F, 0xE0030, 0xE0031, 0xE0032, 0xE0033, 0xE0034, 0xE0035,
			0xE0036, 0xE0037, 0xE0038, 0xE0039, 0xE003A, 0xE003B, 0xE003C, 0xE003D,
			0xE003E, 0xE003F, 0xE0040, 0xE0041, 0xE0042, 0xE0043, 0xE0044, 0xE0045,
			0xE0046, 0xE0047, 0xE0048, 0xE0049, 0xE004A, 0xE004B, 0xE004C, 0xE004D,
			0xE004E, 0xE004F, 0xE0050, 0xE0051, 0xE0052, 0xE0053, 0xE0054, 0xE0055,
			0xE0056, 0xE0057, 0xE0058, 0xE0059, 0xE005A, 0xE005B, 0xE005C, 0xE005D,
			0xE005E, 0xE005F, 0xE0060, 0xE0061, 0xE0062, 0xE0063, 0xE0064, 0xE0065,
			0xE0066, 0xE0067, 0xE0068, 0xE0069, 0xE006A, 0xE006B, 0xE006C, 0xE006D,
			0xE006E, 0xE006F, 0xE0070, 0xE0071, 0xE0072, 0xE0073, 0xE0074, 0xE0075,
			0xE0076, 0xE0077, 0xE0078, 0xE0079, 0xE007A, 0xE007B, 0xE007C, 0xE007D,
			0xE007E, 0xE007F,
		};
} // namespace Private



#define ENABLE_SWITCH(a, b)		\
	do {						\
		if(a != b) {			\
			a = b;				\
			_NotifyChange();	\
		}						\
	} while(false)

	/// P[XtH[h
	inline CodePoint CLexer::CaseFoldCharacter(CodePoint cp) {
		if(cp >= 0x10400 && cp <= 0x10428)	// Deseret
			return cp + 0x00028;
		else if(cp < 0x10000) {	// UCS-2
			const char_t* const	pchCased = std::lower_bound(m_casedCodesUcs2,
				m_casedCodesUcs2 + m_cCasedCodesUcs2, static_cast<char_t>(cp));
			return (*pchCased == cp) ? m_caseFoldedUcs2[pchCased - m_casedCodesUcs2] : cp;
		} else
			return cp;
	}

	/**
	 *	P[XtH[h
	 *	@param pwsz	[in, out] 
	 *	@param cch	
	 */
	inline void CLexer::CaseFoldString(char_t* pwsz, length_t cch) {
		assert(pwsz != 0);
		CaseFoldString(pwsz, cch, pwsz);
	}


	/**
	 *	P[XtH[h
	 *	@param pwsz		
	 *	@param cch		
	 *	@param pwszDest	ʁB<var>cch</var> ̗̈mۂĂKv
	 */
	inline void CLexer::CaseFoldString(const char_t* pwsz, length_t cch, char_t* pwszDest) {
		assert(pwsz != 0 && pwszDest != 0);
		CodePoint	cp;
		for(length_t i = 0; i < cch; ++i) {
			cp = DecodeUtf16SurrogatesToCodePoint(pwsz + i, cch - i);
			cp = CaseFoldCharacter(cp);
			if(cp < 0x10000)
				pwszDest[i] = static_cast<char_t>(cp);
			else {
				EncodeCodePointToUtf16Surrogates(cp, pwszDest + i);
				++i;
			}
		}
	}

	/**
	 *	P[XtH[h
	 *	@param pwsz	
	 *	@param cch	
	 *	@return		ʁBĂяo폜
	 */
	inline char_t* CLexer::CaseFoldString(const char_t* pwsz, length_t cch) {
		assert(pwsz != 0);
		char_t*	pwszFolded = new char_t[cch];
		CaseFoldString(pwsz, cch, pwszFolded);
		return pwszFolded;
	}

	/**
	 *	ASCII 䕶̒ԂB^u䕶Ƃ݂Ȃ
	 *	@param pwsz	ׂ镶
	 *	@param cch	
	 *	@return		ASCII 䕶AĂ钷
	 */
	inline length_t CLexer::EatAsciiControls(const char_t* pwsz, length_t cch) {
		assert(pwsz != 0);
		for(length_t i = 0; i < cch; ++i) {
			if(IsAsciiControl(pwsz[i]))
				continue;
			return i;
		}
		return cch;
	}

	/**
	 *	ʎq̒Ԃ
	 *	@param pwsz	ׂ镶
	 *	@param cch	
	 *	@return		ʎq\AĂ钷BʎqłȂ0
	 *	@see		CLexer::IsIdentifierContinueChar, CLexer::IsIdentifierStartChar
	 */
	inline length_t CLexer::EatIdentifier(const char_t* pwsz, length_t cch) const {
		AssertValid();
		assert(pwsz != 0);

		CodePoint	cp;
		for(length_t i = 0; i < cch; ++i) {
			if(IsUtf16HighSurrogate(pwsz[i])
					&& i < cch - 1
					&& IsUtf16LowSurrogate(pwsz[i + 1]))
				cp = DecodeUtf16SurrogatesToCodePoint(pwsz + i, cch - i);
			else
				cp = pwsz[i];
			if((i == 0 && IsIdentifierStartCodePoint(cp))
					|| IsIdentifierContinueCodePoint(cp)) {
				if(cp >= 0x010000)
					++i;
				continue;
			}
			return i;
		}
		return cch;
	}

	/**
	 *	̒Ԃ
	 *	@param pwsz	ׂ镶
	 *	@param cch	
	 *	@return		p̒BȂ0
	 *	@see		CLexer::IsDigitCodePoint
	 */
	inline length_t CLexer::EatNumerals(const char_t* pwsz, length_t cch) {
		assert(pwsz != 0);

		CodePoint	cp;
		for(length_t i = 0; i < cch; ++i) {
			if(IsUtf16HighSurrogate(pwsz[i])
					&& i < cch - 1
					&& IsUtf16LowSurrogate(pwsz[i + 1]))
				cp = DecodeUtf16SurrogatesToCodePoint(pwsz + i, cch - i);
			else
				cp = pwsz[i];
			if(CLexer::IsDigit(cp)) {
				if(cp >= 0x010000)
					++i;
				continue;
			}
			return i;
		}
		return cch;
	}

	/**
	 *	pň͂܂ꂽ̒Ԃ (pɓ)
	 *	@param pwsz					ׂ镶
	 *	@param cch					
	 *	@param bEscapeByBackSolidus	'\' ŕpGXP[v邩
	 *	@return			p̒BȂ0BĂȂ <var>cch</var> 𓯂lԂ
	 */
	inline length_t CLexer::EatQuotation(const char_t* pwsz, length_t cch, bool bEscapeByBackSolidus) {
		assert(pwsz != 0);

		const char_t	chCloser = static_cast<char_t>(GetQuotationCloser(pwsz[0]));	//  BMP ̂...

		if(chCloser == 0xFFFF)
			return 0;
		for(length_t i = 1; i < cch; ++i) {
			if(pwsz[i] == L'\\' && bEscapeByBackSolidus)	// ͖̕
				++i; 
			else if(pwsz[i] == chCloser)
				return i + 1;
		}
		return cch;
	}

	/**
	 *	Unicode 䕶ǂԂ
	 *	@param pwsz	ׂ镶ւ̃|C^
	 *	@param cch	
	 *	@return		Unicode 䕶̏ꍇ true Ԃ
	 */
	inline bool CLexer::EatUnicodeControls(const char_t* pwsz, length_t cch) {
		assert(pwsz != 0);
		return IsUnicodeControl(DecodeUtf16SurrogatesToCodePoint(pwsz, cch));
	}

	/**
	 *	󔒗ޕ̒Ԃ
	 *	@param pwsz			ׂ镶
	 *	@param cch			
	 *	@param bIncludeTab	^u󔒕Ƃ݂Ȃꍇ true
	 *	@return		󔒗ޕAĂ钷BzCgXy[XłȂ0
	 */
	inline length_t CLexer::EatWhiteSpaces(const char_t* pwsz, length_t cch, bool bIncludeTab) const {
		AssertValid();
		assert(pwsz != 0);
		for(length_t i = 0; i < cch; ++i) {
			if(!IsWhiteSpace(pwsz[i], bIncludeTab))	// BMP ̂...
				return i;
		}
		return cch;
	}

	/// IpobNXbVŃGXP[v\ݒ肷
	inline void CLexer::EnableBackSolidusEscape(bool bEnable) {
		AssertValid();
		ENABLE_SWITCH(m_bEscapeByBackSolidus, bEnable);
	}

	/**
	 *	g[NL/ɂ
	 *	@param type		g[N̎
	 *	@param bEnable	Lɂꍇ true
	 */
	inline void CLexer::EnableToken(TokenType type, bool bEnable) {
		AssertValid();
		if(type >= TT_COUNT)
			return;
		ENABLE_SWITCH(m_enabledTokenTypes[type], bEnable);
	}

	/// Unicode At@xbgAt@xbgƂĎgp邩ݒ肷
	inline void CLexer::EnableUnicodeAlphabets(bool bEnable) {
		AssertValid();
		ENABLE_SWITCH(m_bEnableUnicodeAlphabets, bEnable);
	}

	/// Unicode 󔒗ޕ󔒗ޕƂĎgp邩ݒ肷
	inline void CLexer::EnableUnicodeWhiteSpaces(bool bEnable) {
		AssertValid();
		ENABLE_SWITCH(m_bEnableUnicodeWhiteSpaces, bEnable);
	}

	/// ݒ肪ύXĂCxgnhɒʒmȂ悤ɂ
	inline void CLexer::Freeze() {
		AssertValid();
		m_bFreezed = true;
	}

	/**
	 *	ASCII 䕶̑փeLXgԂ
	 *	@param ch			䕶
	 *	@param pwszGlyph	[out] փeLXgBK2̃KvBI[ null ͕tȂ
	 */
	inline void CLexer::GetAsciiControlSubstitutionGlyph(uchar ch, char_t* pwszGlyph) {
		pwszGlyph[0] = L'^';
		pwszGlyph[1] = ch + ((ch != 0x7F) ? 0x40 : -0x40);
	}

	/// L[[hQԂ
	inline void CLexer::GetKeywords(string_t* pKeywords) const {
		AssertValid();
//		pKeywords = new string_t[m_keywords.];
	}

	/// ͂Ŏgl`Ԃ
	inline NumberFormat CLexer::GetNumberFormat() const {
		AssertValid();
		return m_numberFormat;
	}

	/// Jnp <var>opener</var> ɑΉIpԂB<var>opener</var> płȂꍇ U+FFFF Ԃ
	inline CodePoint CLexer::GetQuotationCloser(CodePoint opener) {
#if ASCENSION_UNICODE_VERSION != 0x0410
#error This code is based on old version of Unicode.
#endif
		switch(opener) {
		case 0x0022:	return 0x0022;	// Quotation Mark
		case 0x0027:	return 0x0027;	// Apostrophe
		case 0x00AB:	return 0x00BB;	// Left-Pointing Double Angle Quotation Mark, Right-...
		case 0x2018:	return 0x2019;	// Left Single Quotation Mark, Right ...
//		case 0x201A:	return 0x????;	// Single Low-9 Quotation Mark
//		case 0x201B:	return 0x????;	// Single High-Reserved-9 Quotation Mark
		case 0x201C:	return 0x201D;	// Left Double Quotation Mark, Right ...
//		case 0x201E:	return 0x????;	// Double Low-9 Quotation Mark
//		case 0x201F:	return 0x????;	// Double High-Reversed-9 Quotation Mark
		case 0x2039:	return 0x203A;	// Single Left-Pointing Angle Quotation Mark, Single Right-...
		case 0x300C:	return 0x300D;	// Left Corner Bracket, Right ...
		case 0x300E:	return 0x300F;	// Left White Corner Bracket, Right ...
//		case 0x301D:	return 0x301E or 0x301F;	break;
		case 0xFE41:	return 0xFE42;	// Presentation Form For Vertical Left Corner Bracket, ... Right ...
		case 0xFE43:	return 0xFE44;	// Presentation Form For Vertical Left White Corner Bracket, ... Right ...
		case 0xFF02:	return 0xFF02;	// Fullwidth Quotation Mark
		case 0xFF07:	return 0xFF07;	// Fullwidth Apostrophe
		case 0xFF62:	return 0xFF63;	// Halfwidth Left Corner Bracket, ... Right ...
		default:		return 0xFFFF;	// 
		}
	}

	/// ASCII 䕶𔻒肷B^u䕶Ƃ݂Ȃ
	inline bool CLexer::IsAsciiControl(CodePoint cp) {
		return cp < 0x20 || cp == 0x7F;
	}

	/// IpobNXbVŃGXP[v\Ԃ
	inline bool CLexer::IsBackSolidusEscapeEnabled() const {
		AssertValid();
		return m_bEscapeByBackSolidus;
	}

	/// 𔻒肷
	inline bool CLexer::IsDigit(CodePoint cp) {
		return (cp >= L'0' && cp <= L'9') || FoldDigit(cp) != cp;
	}

	/// L[[hő啶ʂ邩Ԃ
	inline bool CLexer::IsCaseSensitive() const {
		AssertValid();
		return m_bCaseSensitive;
	}

	/// ԂԂ
	inline bool CLexer::IsFreezed() const {
		AssertValid();
		return m_bFreezed;
	}

	/// g[NLԂ
	inline bool CLexer::IsTokenEnabled(TokenType type) const {
		AssertValid();
		return m_enabledTokenTypes[type];
	}

	/// Unicode At@xbgAt@xbgƂĎgp邩Ԃ
	inline bool CLexer::IsUnicodeAlphabetsEnabled() const {
		AssertValid();
		return m_bEnableUnicodeAlphabets;
	}

	/// Unicode 䕶̔
	inline bool CLexer::IsUnicodeControl(CodePoint cp) {
		return std::binary_search(Private::cc, _endof(Private::cc), cp)
			|| std::binary_search(Private::cf, _endof(Private::cf), cp);
	}

	/// Unicode 󔒗ޕ󔒗ޕƂĎgp邩Ԃ
	inline bool CLexer::IsUnicodeWhiteSpacesEnabled() const {
		AssertValid();
		return m_bEnableUnicodeWhiteSpaces;
	}

	/// 󔒗ޕ̔
	inline bool CLexer::IsWhiteSpace(CodePoint cp, bool bIncludeTab) const {
		if(!m_bEnableUnicodeWhiteSpaces)	// Unicode 󔒗ޕFȂꍇ
			return cp == L' ' || (bIncludeTab && cp == L'\t');
		else	// Unicode 󔒗ޕFꍇ
			return std::binary_search(Private::zs, _endof(Private::zs), cp) || bIncludeTab && cp == L'\t';
	}

	/// ύXCxgXiɒʒm
	inline void CLexer::_NotifyChange() {
		AssertValid();
		if(!m_bFreezed && m_pEventListener != 0)
			m_pEventListener->OnLexerChanged();
	}

	/// ͂Ŏgl`ݒ肷
	inline void CLexer::SetNumberFormat(NumberFormat format) throw(std::invalid_argument) {
		AssertValid();
		if(format >= NF_COUNT)
			throw std::invalid_argument("");
		ENABLE_SWITCH(m_numberFormat, format);
	}

	/// CLexer::Freeze ɂ铀ACxgnhɒʒm
	inline void CLexer::Unfreeze() {
		AssertValid();
		ENABLE_SWITCH(m_bFreezed, false);
	}

	/// RXgN^
	inline CLexer::_CHashTable::_CHashTable(const std::set<string_t>& data, bool bCaseSensitive)
			: m_cEntries(data.size()), m_cchMax(0), m_bCaseSensitive(bCaseSensitive) {
		m_ppEntries = new _Entry*[m_cEntries];
		std::fill<_Entry**, _Entry*>(m_ppEntries, m_ppEntries + m_cEntries, 0);

		for(std::set<string_t>::const_iterator it = data.begin(); it != data.end(); ++it) {
			_Entry*	pNewEntry;

			if(m_bCaseSensitive)
				pNewEntry = new _Entry(*it);
			else {
				char_t*	pwszFolded = CaseFoldString(it->data(), it->length());
				pNewEntry = new _Entry(string_t(pwszFolded, it->length()));
				delete[] pwszFolded;
			}

			const std::size_t	h = _GetHashCode(pNewEntry->str.data(), pNewEntry->str.length());
			if(it->length() > m_cchMax)
				m_cchMax = it->length();
			pNewEntry->pNext = (m_ppEntries[h % m_cEntries] != 0) ? m_ppEntries[h % m_cEntries] : 0;
			m_ppEntries[h % m_cEntries] = pNewEntry;
		}
	}

	/// fXgN^
	inline CLexer::_CHashTable::~_CHashTable() {
		for(std::size_t i = 0; i < m_cEntries; ++i)
			delete m_ppEntries[i];
		delete[] m_ppEntries;
	}

	/// 
	inline bool CLexer::_CHashTable::Find(const char_t* pwsz, length_t cch) const {
		if(cch > m_cchMax)
			return false;

		const char_t* const	pwszFolded = m_bCaseSensitive ? pwsz : CaseFoldString(pwsz, cch);
		const std::size_t	h = _GetHashCode(pwszFolded, cch);
		_Entry*				pEntry = m_ppEntries[h % m_cEntries];
		bool				bFound = false;

		while(pEntry != 0) {
			if(pEntry->str.length() == cch && std::wcsncmp(pEntry->str.data(), pwszFolded, cch) == 0) {
				bFound = true;
				break;
			}
			pEntry = pEntry->pNext;
		}
		if(!m_bCaseSensitive)
			delete[] pwszFolded;
		return bFound;
	}

	/// nbVl𓾂
	inline ulong CLexer::_CHashTable::_GetHashCode(const char_t* pwsz, length_t cch) {
		ulong	h = 0;
		for(std::size_t i = 0; i < cch; ++i) {
			h <<= 1;
			h += pwsz[i];
		}
		return h;
	}

#undef ENABLE_SWITCH

} // namespace Ascension

#endif /* _LEXER_H_ */

/* [EOF] */