#ifndef __MERCURY_REGEX_PARSER__
#define __MERCURY_REGEX_PARSER__

#include "lexer.h"
#include "parse_node.h"


namespace mercury
{
	typedef enum tagREGEX_ERROR
	{
		REGERR_SYNTAX,
		REGERR_SET_RANGE,
		REGERR_REPEAT_MINMAX,
	} REGEX_ERROR;

	namespace _regex
	{

		template<typename _Input, typename _InputIterator, typename _Traits = regex_traits<_Input> >
		class parser
		{
		public:
			typedef parse_tree            <_Input> *parse_tree_ptr;
			typedef parse_node_element_set<_Input> *parse_node_element_set_ptr;

		public:
			parser(const _InputIterator &pattern_begin, const _InputIterator &pattern_end) : m_lexer(pattern_begin, pattern_end)
			{
				_init();
			}

			parse_tree_ptr parse(void)
			{
				return rule_expression();
			}

			bool is_head(void) const { return m_lexer.is_head(); }
			bool is_tail(void) const { return m_lexer.is_tail(); }

		private:
			lexer<_Input, _InputIterator, _Traits> m_lexer;
			token<_Input>                          m_lookahead;

		private:

			parse_tree_ptr rule_expression(void)
			{
				parse_tree_ptr node = NULL;
				try
				{
					node = rule_subexpression();
					move(TOKEN_EOS);
					return node;
				}
				catch(...)
				{
					delete node; node = NULL;
					throw;
				}
			}

			parse_tree_ptr rule_subexpression(void)
			{
				parse_tree_ptr node  = NULL;
				parse_tree_ptr node2 = NULL;
				try
				{
					node = rule_sequence();
					if(m_lookahead.type == TOKEN_DISJUNCTION)
					{
						move();
						node2 = rule_subexpression();
						node  = new parse_node_disjunction<_Input, _Traits>(node, node2, NULL);
						node2 = NULL;
					}
					return node;
				}
				catch(...)
				{
					delete node ; node  = NULL;
					delete node2; node2 = NULL;
					throw;
				}
			}

			parse_tree_ptr rule_sequence(void)
			{
				if(_is_subsequence(m_lookahead.type))
				{
					return rule_subsequence();
				}
				else
				{
					return new parse_node_none<_Input, _Traits>();
				}
			}

			parse_tree_ptr rule_subsequence(void)
			{
				parse_tree_ptr node1 = NULL;
				parse_tree_ptr node2 = NULL;
				try
				{
					node1 = rule_repeat();

					if(_is_subsequence(m_lookahead.type))
					{
						node2 = rule_subsequence();
						return new parse_node_conjunction<_Input, _Traits>(node1, node2, NULL);
					}
					else
					{
						return node1;
					}
				}
				catch(...)
				{
					delete node1; node1 = NULL;
					delete node2; node2 = NULL;
					throw;
				}
			}

			parse_tree_ptr rule_repeat(void)
			{
				parse_tree_ptr node = NULL;
				try
				{
					node = rule_factor();
					switch(m_lookahead.type)
					{
					case TOKEN_REPEAT0:
						move();
						node = new parse_node_repeat0<_Input, _Traits>(node);
						break;

					case TOKEN_REPEAT1:
						move();
						node = new parse_node_repeat1<_Input, _Traits>(node);
						break;

					case TOKEN_REPEAT01:
						move();
						node = new parse_node_repeat01<_Input, _Traits>(node);
						break;

					case TOKEN_REPEAT_BEGIN:
						node = _subrule_repeat_number(node);
						break;

					default:
						break;
					}
					return node;
				}
				catch(...)
				{
					delete node; node = NULL;
					throw;
				}
			}

			parse_tree_ptr rule_factor(void)
			{
				parse_tree_ptr node = NULL;
				try
				{
					switch(m_lookahead.type)
					{
					case TOKEN_GROUP_BEGIN:
						move(TOKEN_GROUP_BEGIN);
						node = rule_subexpression();
						move(TOKEN_GROUP_END);
						break;

					case TOKEN_SET_BEGIN:
						move(TOKEN_SET_BEGIN);
						node = rule_set();
						move(TOKEN_SET_END);
						break;

					case TOKEN_ANY:
						node = new parse_node_any<_Input, _Traits>();
						move();
						break;

					case TOKEN_CLASS:
						node = new parse_node_class<_Input,_Traits>(m_lookahead.cls, m_lookahead.invert);
						move(TOKEN_CLASS);
						break;

					case TOKEN_CHARACTER:
						node = new parse_node_character<_Input, _Traits>(m_lookahead.ch);
						move(TOKEN_CHARACTER);
						break;

					default:
						break;
					}
					return node;
				}
				catch(...)
				{
					delete node; node = NULL;
					throw;
				}
			}

			parse_tree_ptr rule_set(void)
			{
				parse_node_element_set<_Input> *node = NULL;
				try
				{
					if(m_lookahead.type == TOKEN_SET_NOT)
					{
						move(TOKEN_SET_NOT);
						node = rule_subset();
						node  = new parse_node_not<_Input, _Traits>(node);
					}
					else
					{
						node = rule_subset();
					}
					return node;
				}
				catch(...)
				{
					delete node; node = NULL;
					throw;
				}
			}

			parse_node_element_set_ptr rule_subset(void)
			{
				switch(m_lookahead.type)
				{
				case TOKEN_CLASS:
					return _subrule_subset_class();

				case TOKEN_CHARACTER:
					return _subrule_subset_character();

				default:
					throw REGERR_SYNTAX;
				}
			}

		private:
			void _init(void)
			{
				move();
			}


			bool _is_subsequence(const TOKEN_TYPE type)
			{
				return (type == TOKEN_GROUP_BEGIN || type == TOKEN_SET_BEGIN || type == TOKEN_ANY || type == TOKEN_CLASS || type == TOKEN_CHARACTER);
			}


			void move(void)
			{
				m_lookahead = m_lexer.get_token();
			}

			void move(const TOKEN_TYPE type)
			{
				if(m_lookahead.type != type)
				{
					throw REGERR_SYNTAX;
				}
				move();
			}

		private:

			parse_tree_ptr _subrule_repeat_number(parse_tree_ptr node)
			{
				move(TOKEN_REPEAT_BEGIN);

				const int number1 = m_lookahead.num;
				move(TOKEN_NUMBER);

				if(m_lookahead.type != TOKEN_REPEAT_SEPARATOR)
				{
					node = new parse_node_repeatn<_Input, _Traits>(node, number1);
					goto repeat_exit;
				}

				move(TOKEN_REPEAT_SEPARATOR);

				if(m_lookahead.type != TOKEN_NUMBER)
				{
					node = new parse_node_repeatn_<_Input, _Traits>(node, number1);
					goto repeat_exit;
				}
				else
				{
					const int number2 = m_lookahead.num;
					move(TOKEN_NUMBER);

					if(number1 > number2) { throw REGERR_REPEAT_MINMAX; }
					node = new parse_node_repeatmn<_Input, _Traits>(node, number1, number2);

					goto repeat_exit;
				}
repeat_exit:
				move(TOKEN_REPEAT_END);
				return node;
			}

			parse_node_element_set_ptr _subrule_subset_class(void)
			{
				parse_node_element_set_ptr node  = NULL;
				parse_node_element_set_ptr node2 = NULL;
				try
				{
					node = new parse_node_class<_Input, _Traits>(m_lookahead.cls, m_lookahead.invert);
					move(TOKEN_CLASS);

					switch(m_lookahead.type)
					{
					case TOKEN_CHARACTER:
					case TOKEN_CLASS:
						node2 = rule_subset();
						node  = new parse_node_set<_Input, _Traits>(node, node2, NULL);
						node2 = NULL;
						break;

					default:
						break;
					}
					return node;
				}
				catch(...)
				{
					delete node ; node  = NULL;
					delete node2; node2 = NULL;
					throw;
				}
			}

			parse_node_element_set_ptr _subrule_subset_character(void)
			{
				parse_node_element_set_ptr node  = NULL;
				parse_node_element_set_ptr node2 = NULL;
				try
				{
					const _Input char1 = m_lookahead.ch;
					move(TOKEN_CHARACTER);

					switch(m_lookahead.type)
					{
					case TOKEN_CHARACTER:
					case TOKEN_CLASS:
						node  = new parse_node_character<_Input, _Traits>(char1);
						node2 = rule_subset();
						node  = new parse_node_set<_Input, _Traits>(node, node2, NULL);
						node2 = NULL;
						break;

					case TOKEN_SET_RANGE:
						{
							move(TOKEN_SET_RANGE);
							const _Input char2 = m_lookahead.ch;
							move(TOKEN_CHARACTER);

							if(char1 > char2) { throw REGERR_SET_RANGE; }
							node = new parse_node_range<_Input, _Traits>(char1, char2);

							switch(m_lookahead.type)
							{
							case TOKEN_CHARACTER:
							case TOKEN_CLASS:
								node2 = rule_subset();
								node  = new parse_node_set<_Input, _Traits>(node, node2, NULL);
								node2 = NULL;
								break;

							default:
								break;
							}
						}
						break;

					default:
						node = new parse_node_character<_Input, _Traits>(char1);
					}
					return node;
				}
				catch(...)
				{
					delete node ; node  = NULL;
					delete node2; node2 = NULL;
					throw;
				}
			}
		};
	}
}

#endif
