/*-
 * Copyright (c) 2005 osakabe
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <iostream>
#include <sstream>
#include <boost/algorithm/string/trim.hpp>
#include <boost/algorithm/string/predicate.hpp>
using namespace std;

#include "jsp_parser.h"
using namespace sj;

#define PREFIX_BEGIN        0
#define PREFIX_ENDED        1
#define PREFIX_NOTFOUND     2
#define TARGET_FOUND        0
#define TARGET_NOTFOUND     1
#define PARSE_ERROR        -1

void jsp_parser::debug_dump(const jsp_element_ptr& e, int d)
{
    for (int i = 0; i < d; i++)
        cerr << " ";

    string_map_t::iterator i;

    switch (e->type()) {
    case normal :
        cerr << "[" << boost::trim_copy_if(e->line(), boost::is_any_of("\r\n"))
             << "]" << endl;
        break;

    case directive :
        cerr << "directive: action:[" << e->action() << "]";
        for (i = e->attribute.begin(); i != e->attribute.end(); i++)
            cerr << " [" << i->first << "]=[" << i->second << "]";
        cerr << endl;
        break;

    case declaration :
        cerr << "decl ["
             << boost::trim_copy_if(e->line(), boost::is_any_of("\r\n"))
             << "]" << endl;
        break;

    case expression :
        cerr << "expr ["
             << boost::trim_copy_if(e->line(), boost::is_any_of("\r\n"))
             << "]" << endl;
        break;

    case scriptlet :
        cerr << "scri ["
             << boost::trim_copy_if(e->line(), boost::is_any_of("\r\n"))
             << "]" << endl;
        break;

    case action :
        cerr << "tag " << "prefix:[" << e->prefix() << "] "
             << "action:[" << e->action() << "] ";

        for ( i = e->attribute.begin(); i != e->attribute.end(); i++)
            cerr << " [" << i->first << "]=[" << i->second << "]";
        cerr << endl;

    case jsp_std_action :
        cerr << "JSP-tag " << "prefix:[" << e->prefix() << "] "
             << "action:[" << e->action() << "] ";

        for ( i = e->attribute.begin(); i != e->attribute.end(); i++)
            cerr << " [" << i->first << "]=[" << i->second << "]";
        cerr << endl;
    }

    vector<jsp_element_ptr> c = e->jsp_elements();
    vector<jsp_element_ptr>::iterator j = c.begin();
    for (; j != c.end(); j++)
        debug_dump(*j, d + 2);
}

bool jsp_parser::start(const string& jsp_file)
{
#ifdef DEBUG
    cerr << "jsp_parser::start Enter :" << jsp_file << endl;
#endif
    /*
     * ƬȤ롼ȤȤʤ jsp_element Ѱդ.
     * 줫Ϥȼ jsp_element Ϥ٤
     *  _jsp_element ΥͥȤΡɤȤ¸.
     */
    _jsp_element = jsp_element_ptr(new jsp_element());

    /*
     * `jsp:' Ȥ륿ΥץեåϥॿƱ̤ͤ
     * ɬפ뤿˥ץեåȤϿƤ.
     */
    _prefix.push_back("jsp");

    /*
     * jsp_parser Ǥ JSP ե file_t(deque<string>) 
     * ΤǤ file_t  JSP եƤͤؤ
     */
    file_t file;
    istringstream is(jsp_file);
    string s;

    while (getline(is, s))
        file.push_back(s + "\n");

    bool ret = parse(file, _jsp_element);

#ifdef DEBUG
    debug_dump(_jsp_element, 0);
#endif
    return ret;
}

jsp_element_ptr jsp_parser::result() throw(std::runtime_error)
{
    if (!_jsp_element)
        throw std::runtime_error("illegal state.");

    return _jsp_element;
}

map<string, string> jsp_parser::taguri_map(jsp_element_ptr e)
{
    string_map_t uri_map;

    vector<jsp_element_ptr> c = e->jsp_elements();
    vector<jsp_element_ptr>::iterator i = c.begin();
    for (; i != c.end(); i++) {
        string_map_t m = taguri_map(*i);
        uri_map.insert(m.begin(), m.end());
    }
    if (e->action() == "taglib") {
        string_map_t::iterator u = e->attribute.find("uri");
        if (u != e->attribute.end()) {
            string uri = u->second;
            if ((u = e->attribute.find("prefix")) != e->attribute.end())
                uri_map.insert(make_pair(u->second, uri));
        }
    }
    return uri_map;
}

vector<string> jsp_parser::taguri_list(jsp_element_ptr e)
{
    vector<string> uri_list;

    vector<jsp_element_ptr> c = e->jsp_elements();
    vector<jsp_element_ptr>::iterator i = c.begin();
    for (; i != c.end(); i++) {
        vector<string> v = taguri_list(*i);
        copy(v.begin(), v.end(), back_inserter(uri_list));
    }
    if (e->action() == "taglib") {
        string_map_t::iterator u = e->attribute.find("uri");
        if (u != e->attribute.end())
            uri_list.push_back(u->second);
    }
    return uri_list;
}

string jsp_parser::prefix(const string& uri) const
{
    return find_prefix(_jsp_element, uri);
}

string jsp_parser::find_prefix(const jsp_element_ptr e, const string& uri) const
{
    vector<jsp_element_ptr> c = e->jsp_elements();
    if (e->action() == "taglib") {
        string_map_t::const_iterator u = e->attribute.find("uri");
        if (u != e->attribute.end() && uri == u->second) {
            if ((u = e->attribute.find("prefix")) != e->attribute.end())
                return u->second;
        }
    }
    vector<jsp_element_ptr>::const_iterator i = c.begin();
    for (; i != c.end(); i++) {
        string s = find_prefix(*i, uri);
        if (!s.empty())
            return s;
    }
    return "";
}

bool jsp_parser::parse(file_t& file, jsp_element_ptr e)
{
    string line;

    while (file.size()) {
        line = file.front();
        file.pop_front();

        int ret = parse_prefix(e, file, line);
        switch (ret) {
        case PREFIX_BEGIN :
            continue;
        case PREFIX_ENDED :
            return true;
        case PREFIX_NOTFOUND :
            break;
        }

        ret = parse_expression(e, file, line);
        switch (ret) {
        case TARGET_FOUND :
            continue;
        case TARGET_NOTFOUND :
            break;
        default :
            return false;
        }

        ret = parse_declaration(e, file, line);
        switch (ret) {
        case TARGET_FOUND :
            continue;
        case TARGET_NOTFOUND :
            break;
        default :
            return false;
        }

        ret = parse_scriptlet(e, file, line);
        switch (ret) {
        case TARGET_FOUND :
            continue;
        case TARGET_NOTFOUND :
            break;
        default :
            return false;
        }

        ret = parse_directive(e, file, line);
        switch (ret) {
        case TARGET_FOUND :
            continue;
        case TARGET_NOTFOUND :
            break;
        default :
            return false;
        }

        jsp_element_ptr c(new jsp_element());
        c->type(normal);
        c->line(line);
        e->push_back(c);
    }
    return true;
}

int jsp_parser::parse_prefix(jsp_element_ptr e, file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif

    string line = s;

    vector<string>::iterator i = _prefix.begin();
    for (; i != _prefix.end(); i++) {
        string begin = *i;
        begin.insert(0, "<");
        begin.append(":");

        string::size_type pos = line.find(begin);

        // prefix Ʊͤ
        // ˤʸ̾ʸȤ¸
        if (pos != string::npos) {
            if (pos != 0) {
                string remain = line.substr(0, pos);
                jsp_element_ptr c(new jsp_element());
                c->type(normal);
                c->line(remain);
                e->push_back(c);

                line.erase(0, pos);
            }

            parse_action(e, file, line);
            return PREFIX_BEGIN;
        }

        // prefix ޤཪλϺ
        // Ĥʸ̾ʸȤ¸
        string end = *i;
        end.insert(0, "</");
        end.append(":");

        pos = line.find(end);
        if (pos != string::npos) {
            erase_action(file, line);
            return PREFIX_ENDED;
        }
    }
    return PREFIX_NOTFOUND;
}

void jsp_parser::parse_action(jsp_element_ptr e, file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif

    jsp_element_ptr c(new jsp_element());

    string::size_type pos = s.find("<jsp:");

    c->type(pos == 0 ? jsp_std_action : action);
    c->line(s);

    string line = s;

    // νλ">"򸡺¸ߤ ">" ʹߤ file ᤹.
    // ¸ߤʤϤɤ߹
    bool invalue = false;
    for (size_t i = 0;; i++) {
        if (i == line.length()) {
            line += file.front();
            file.pop_front();
        }

        if (line[i] == '"')
            invalue = invalue ? false : true;
        else if (!invalue && line[i] == '>') {
            string remain = line.substr(i + 1);
            if (remain.length())
                file.push_front(remain);
            line.erase(i + 1);
            break;
        }
    }

    // < 
    line.erase(0, 1);

    pos = line.find(":");
    if (pos != string::npos) {
        c->prefix(line.substr(0, pos));
        line.erase(0, pos + 1);
    } else {
        cerr << "parse error :invalid format in action directive\":\""
                  << endl;
        exit(0);
    }

    if ((pos = line.find(" ")) == string::npos && 
        (pos = line.find("/")) != string::npos ||
        (pos = line.find(">")) != string::npos)
    {
        c->action(line.substr(0, pos));
    }

    if ((pos = line.find(" ")) != string::npos) {
        c->action(line.substr(0, pos));
        line.erase(0, pos + 1);

        for (;;) {
            pair<string, string> attr;

            /*
             * ॿ° <%= ... %> ѤƤ
             * <%= %> .
             * ¸ߤʤʸȤ '"' ǳ
             */
            if (split_attribute(line, attr)) {
                string value = attr.second;
                if (!value.compare(0, 3, "<%=")) {
                    value.erase(0, 3);
                    value.erase(value.length() - 2);
                } else {
                    value = string("\"") + value + "\"";
                }
                attr.second = value;
                c->attribute.insert(attr);
            }
            else
                break;
        }
    }

    if (line[line.length() - 2] != '/') {
        parse(file, c);
    }

    e->push_back(c);
}

void jsp_parser::erase_action(file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif

    string line = s;

    // νλ">"򸡺¸ߤ ">" ʹߤ file ᤹.
    // ¸ߤʤϤɤ߹
    for (;;) {
        string::size_type pos = line.find(">");
        if (pos == string::npos) {
            line += file.front();
            file.pop_front();
        } else {
            if (pos + 2 < line.length()) {
                file.push_front(line.substr(pos + 1));
            }
            line.erase(pos + 1);
            break;
        }
    }
}

int jsp_parser::parse_scriptlet(jsp_element_ptr e, file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif

    string line = s;
    string::size_type pos;

    // Ƭ <% ޤǤʸ¸ߤ̾ʸȤƽ
    // file ᤷ٥꥿󤹤
    // äȼˤˤ Ƭ <% ˤʤäƤϤ
    if ((pos = line.find("<%")) == string::npos)
        return TARGET_NOTFOUND;

    // Ƭ <%@ ξϥץȥåȤǤϤʤΤǥ꥿󤹤
    if (line[pos + 2] == '@' || line[pos + 2] == '!' || line[pos + 2] == '=')
        return TARGET_NOTFOUND;

    if (pos != 0) {
        file.push_front(line.substr(pos));
        file.push_front(line.substr(0, pos));
        return TARGET_FOUND;
    }

    // ԤϤǤϢ뤹
    while (line.find("%>") == string::npos) {
        if (file.size() == 0) {
            cerr << "invalid directive " << line << endl;
            return false;
        }
        line += file.front();
        file.pop_front();
    }

    // %> ˤʸ̾ʸȤư
    // ԤΤߤξϲԤʤʤ
    if ((pos = line.find("%>")) != string::npos) {
        if (line.substr(pos + 2).length() > 0)
            file.push_front(line.substr(pos + 2));
        line.erase(pos);
    }

    // "<%" 
    line.erase(0, 2);

    jsp_element_ptr c(new jsp_element());
    c->type(scriptlet);
    c->line(line);
    e->push_back(c);

    return TARGET_FOUND;
}

int jsp_parser::parse_expression(jsp_element_ptr e, file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif

    string line = s;
    string::size_type pos;

    // Ƭ <% ޤǤʸ¸ߤ̾ʸȤƽ
    // file ᤷ٥꥿󤹤
    // äȼˤˤ Ƭ <% ˤʤäƤϤ
    if ((pos = line.find("<%")) == string::npos)
        return TARGET_NOTFOUND;

    // Ƭ <%! ʳξϥץȥåȤǤϤʤΤǥ꥿󤹤
    if (line[pos + 2] != '=')
        return TARGET_NOTFOUND;

    if (pos != 0) {
        file.push_front(line.substr(pos));
        file.push_front(line.substr(0, pos));
        return TARGET_FOUND;
    }

    // ԤϤǤϢ뤹
    while (line.find("%>") == string::npos) {
        if (file.size() == 0) {
            cerr << "invalid directive " << line << endl;
            return false;
        }
        line += file.front();
        file.pop_front();
    }

    // %> ˤʸ̾ʸȤư
    // ԤΤߤξϲԤʤʤ
    if ((pos = line.find("%>")) != string::npos) {
        if (line.substr(pos + 2).length() > 0)
            file.push_front(line.substr(pos + 2));
        line.erase(pos);
    }

    // "<%" 
    line.erase(0, 3);

    jsp_element_ptr c(new jsp_element());
    c->type(expression);
    c->line(line);
    e->push_back(c);

    return TARGET_FOUND;
}

int jsp_parser::parse_declaration(jsp_element_ptr e, file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif
    string line = s;
    string::size_type pos;

    // Ƭ <% ޤǤʸ¸ߤ̾ʸȤƽ
    // file ᤷ٥꥿󤹤
    // äȼˤˤ Ƭ <% ˤʤäƤϤ
    if ((pos = line.find("<%")) == string::npos)
        return TARGET_NOTFOUND;

    // Ƭ <%! ʳξϥץȥåȤǤϤʤΤǥ꥿󤹤
    if (line[pos + 2] != '!')
        return TARGET_NOTFOUND;

    if (pos != 0) {
        file.push_front(line.substr(pos));
        file.push_front(line.substr(0, pos));
        return TARGET_FOUND;
    }

    // ԤϤǤϢ뤹
    while (line.find("%>") == string::npos) {
        if (file.size() == 0) {
            cerr << "invalid directive " << line << endl;
            return false;
        }
        line += file.front();
        file.pop_front();
    }

    // %> ˤʸ̾ʸȤư
    // ԤΤߤξϲԤʤʤ
    if ((pos = line.find("%>")) != string::npos) {
        if (line.substr(pos + 2).length() > 0)
            file.push_front(line.substr(pos + 2));
        line.erase(pos);
    }

    // "<%" 
    line.erase(0, 3);

#ifdef DEBUG
    cerr << "declaration:[" << line << "]" << endl;
#endif

    jsp_element_ptr c(new jsp_element());
    c->type(declaration);
    c->line(line);
    e->push_back(c);

    return TARGET_FOUND;
}

int jsp_parser::parse_directive(jsp_element_ptr e, file_t& file, const string& s)
{
#ifdef DEBUG
    cerr << __FUNCTION__ << " [" << s << "]" << endl;
#endif

    vector<string> directive_list;
    directive_list.push_back("page");
    directive_list.push_back("include");
    directive_list.push_back("taglib");

    string line = s;
    string::size_type pos;

    // Ƭ <%@ ޤǤʸ¸ߤ̾ʸȤƽ
    // file ᤷ٥꥿󤹤
    // äȼˤˤ Ƭ <%@ ˤʤäƤϤ
    if ((pos = line.find("<%@")) == string::npos) {
        return TARGET_NOTFOUND;
    } else if (pos != 0) {
        file.push_front(line.substr(pos));
        file.push_front(line.substr(0, pos));
        return TARGET_FOUND;
    }

    // ԤϤǤϢ뤹
    while (line.find("%>") == string::npos) {
        if (file.size() == 0) {
            cerr << "invalid directive " << line << endl;
            return false;
        }
        line += file.front();
        file.pop_front();
    }

    // %> ˤʸ̾ʸȤư
    // ԤΤߤξϲԤʤʤ
    if ((pos = line.find("%>")) != string::npos) {
        if (line.substr(pos + 2).length() > 0) {
            file.push_front(line.substr(pos + 2));
            line.erase(pos + 2);
        }
    }

    string tmp = line;

    // "<%@" 
    line.erase(0, 3);

    // ³Ƥ
    while (line[0] == ' ' || line[0] == '\t')
        line.erase(0, 1);

    string                            dire;
    string_map_t    attribute;

    vector<string>::iterator i = directive_list.begin();
    for (; i != directive_list.end(); i++) {

        if ((pos = line.find(*i)) == string::npos || pos != 0)
            continue;

        // ޥå directive ̾ ʬ.
        dire = *i;
        line.erase(0, i->length());

        // °ʬ䤷¸
        for (;;) {
            pair<string, string> attr;
            if (split_attribute(line, attr))
                attribute.insert(attr);
            else
                break;
        }

        if (dire == "taglib") {
            string p = attribute.find("prefix")->second;
            _prefix.push_back(p);
        }
    }

#ifdef DEBUG
    cerr << "directive :[" << dire << "]" << endl;
    string_map_t::iterator j = attribute.begin();
    for (; j != attribute.end(); j++) {
        cerr << "attribute:[" << j->first << "][" << j->second << "]"
                  << endl;
    }
#endif
    jsp_element_ptr c(new jsp_element());
    c->type(directive);
    c->line(tmp);
    c->action(dire);
    c->attribute = attribute;
    e->push_back(c);

    return TARGET_FOUND;
}

bool jsp_parser::split_attribute(string& s, pair<string, string>& p)
{
#ifdef DEBUG
    std::cerr << __FUNCTION__ << std::endl;
#endif

    // ³̵
    while (s[0] == ' ' || s[0] == '\t')
        s.erase(0, 1);

    string::size_type pos;

    /*
     * '=' ʤ°̵ȽǤ
     */
    if ((pos = s.find("=")) == string::npos)
        return false;

    p.first = s.substr(0, pos);
    s.erase(0, pos + 1);

    if (s[0] != '"' && s[0] != '\'') {
        cerr << "ٹ :°ͤ \" ޤ \' ޤ :[" << s << "]" << endl;
        return false;
    }

    /*
     * "<%=request.getParameter("format")%>" Τ褦°ͤ
     * ץȥåȤޤޤ <%  %> ޤǤФ.
     */
    s.erase(0, 1);

    string delim;

    if (!s.compare(0, 2, "<%")) {
        if ((pos = s.find("%>")) != string::npos)
            p.second = s.substr(0, pos + 2);
    } else {
        if ((pos = s.find("\"")) != string::npos) {
            p.second = s.substr(0, pos);
            delim = "\"";
        }

        if ((pos = s.find("'"))  != string::npos) {
            p.second = s.substr(0, pos);
            delim = "'";
        }
    }

    if (p.second.empty()) {
        cerr << "ٹ :°ͤǤ :[" << s << "]" << endl;
        return false;
    }

    s.erase(0, p.second.length());

    // °ͤϤ '"' ޤǤƤ
    if ((pos = s.find(delim)) != string::npos) {
        s.erase(0, pos + 1);
    } else {
        cerr << "ٹ :°ͤ \" ޤ \' ޤ :[" << s << "]" << endl;
        return false;
    }

#ifdef DEBUG
    std::cerr << "first->[" << p.first << "] second->[" << p.second << "]" << std::endl;
#endif

    return true;
}
