#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import sys
import glob
import getopt
import re
import unicodedata
import logging
import gzip

VERSION = "0.2.0"


def convert(lines, filename_noext, template_dir=None):
    output = ""
    try:
        title = get_title(lines[0])
    except IndexError:
        return None
    count = len(lines)
    filesize = len("".join(lines))

    params = {
        "title": title, "filename": filename_noext, "count": count,
        "link_all": get_link_all(filename_noext),
        "link_last50": get_link_last50(count, template_dir),
        "link_pager": get_link_pager(count, template_dir),
        "skin_path": get_skin_path(template_dir),
        "filesize": filesize, "filesize2": filesize / 1024}

    output += get_template_header(template_dir) % params

    template_body = get_template_body(template_dir)
    number = 1
    for line in lines:
        try:
            name, email, date, message = line.split("<>")[:4]
        except ValueError:
            return None

        if template_dir == "*text*":
            message = html2text(message)
        else:
            message = auto_link(message, template_dir)

        name2 = "<font color=green><b>%s</b></font>" % name
        if email is not "":
            name2 = "<a href=\"mailto:%s\"><b>%s</b></a>" % (email, name)
        if template_dir == "*text*":
            name2 = "%s" % name
        if template_dir == "*text*" and email is not "":
            name2 = ("%s E-mail:%s" %
                     (name.replace("<b>", "").replace("</b>", ""), email))

        output += (template_body %
                   {"number": number, "name": name, "email": email,
                    "name2": name2, "date": date, "message": message})
        number += 1

    output += (get_template_footer(template_dir) % params)
    return output


def convert_file(input_file, output_dir, template_dir=None):
    filename_noext = re.sub("\.dat\.gz$|\.dat$", "",
                            os.path.basename(input_file))
    lines = open_file(input_file).readlines()
    output = convert(lines, filename_noext, template_dir)

    if output is None:
        logging.error("Could not parse file: %s" % input_file)
        return False

    output_file = os.path.join(output_dir, filename_noext + ".html")
    if template_dir == "*text*":
        output_file = os.path.join(output_dir, filename_noext + ".txt")
    if os.path.exists(output_file):
        logging.warning("%s already exists. Overwriting ..." %
                        output_file)

    logging.info("Generating %s" % output_file)
    try:
        if output_dir == "-":
            output_file = "stdout"
            sys.stdout.write(output)
        else:
            f = open(output_file, "w")
            f.write(output)
            f.close()
    except:
        logging.error("Failed to write file: %s" % output_file)
        return False

    return True


def convert_files(input_files, output_dir, template_dir=None,
                  index=False, subject=False):
    filenames = get_filenames(input_files)

    if len(filenames) <= 0:
        logging.error("No input files")
        sys.exit(2)

    if output_dir != "-" and not os.path.exists(output_dir):
        logging.info("Creating directory ...")
        try:
            os.makedirs(output_dir)
        except OSError, (errorno, strerror):
            logging.error("Could not create %s: %s" % (output_dir, strerror))
            sys.exit(2)
    if output_dir != "-" and not os.access(output_dir, os.W_OK):
        logging.error("Could not open %s: permission denied" % output_dir)
        sys.exit(2)

    for filename in filenames:
        convert_file(filename, output_dir, template_dir)

    if index:
        make_index(filenames, output_dir)
    if subject:
        make_subject(filenames, output_dir)


def get_filenames(input_files):
    filenames = []
    for pathname in input_files:
        logging.debug("get_filenames(): pathname=%s" % pathname)
        if os.path.isdir(pathname):
            filenames += get_filenames(
                glob.glob(os.path.abspath(pathname) + "/*.dat") +
                glob.glob(os.path.abspath(pathname) + "/*.dat.gz"))
            continue
        if not os.path.isfile(pathname):
            logging.warning("Skipping %s: not found" % pathname)
            continue
        if not os.access(pathname, os.R_OK):
            logging.warning("Skipping %s: permission denied" % pathname)
            continue
        filenames.append(pathname)

    filenames.sort()
    return filenames


def open_file(filename):
    if os.path.splitext(filename)[1] == ".gz":
        return gzip.open(filename)
    return open(filename)


def get_skin_path(template_dir):
    skin_path = ""
    if template_dir:
        skin_path = "file://" + os.path.abspath(template_dir) + "/"
    return skin_path


def get_link_all(filename_noext):
    return u'<a href="%s.html">全部</a>'.encode("shift-jis") % filename_noext


def get_link_last50(count, use_template=False):
    last50 = count - 49
    if last50 <= 0:
        last50 = 1
    prefix = ""
    if not use_template:
        prefix = "R"
    return (u'<a href="#%s%s">最新50</a>'.encode("shift-jis") %
            (prefix, last50))


def get_link_pager(count, use_template=False):
    link_pager = ""
    prefix = ""
    if not use_template:
        prefix = "R"
    for i in range(0, (count / 100 + 1)):
        link_pager += ('<a href="#%s%s">%s-</a> ' %
                       (prefix, i * 100 + 1, i * 100 + 1))
    link_pager = link_pager.rstrip()
    return link_pager


def make_index(input_files, output_dir):
    index_file = os.path.join(output_dir, "index.html")
    if os.path.exists(index_file):
        logging.warning("%s already exists. Overwriting ..." % index_file)

    output = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " \
        "Transitional//EN\">\r\n" \
        "<html>\r\n" \
        "<head>\r\n" \
        "<meta http-equiv=\"Content-Type\" content=\"text/html; " \
        "charset=Shift_JIS\">\r\n" \
        "<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\r\n" \
        "<title>BBS_log</title>\r\n" \
        "</head>\r\n" \
        "<body bgcolor=\"#efefef\" text=\"black\" link=\"blue\" " \
        "alink=\"red\" vlink=\"#660099\">\r\n" \
        "<div style=\"margin-bottom:0.5em;\"></div>\r\n" \
        "<div style=\"margin-bottom:1em;\">\r\n"

    number = 1
    for filename in input_files:
        title = get_title(open_file(filename).readline())
        count = len(open_file(filename).readlines())
        output += (u"　<a href=\"%s\">%s：%s(%s)</a><br>\r\n"
                   .encode("shift-jis") %
                   (os.path.basename(filename).replace(".dat", ".html"),
                    number, title, count))
        number += 1

    output += "</div>\r\n</body>\r\n</html>\r\n"

    logging.info("Generating %s" % index_file)
    try:
        if output_dir == "-":
            index_file = "stdout"
            sys.stdout.write(output)
        else:
            f = open(index_file, "w")
            f.write(output)
            f.close()
    except IOError, (errorno, strerror):
        logging.error("Failed to write %s: %s" % (index_file, strerror))
        return False

    return True


def make_subject(input_files, output_dir):
    subject_file = os.path.join(output_dir, "subject.txt")
    if os.path.exists(subject_file):
        logging.warning("%s already exists. Overwriting ..." % subject_file)

    logging.info("Generating %s" % subject_file)
    output = ""
    for filename in input_files:
        title = get_title(open_file(filename).readline())
        count = len(open_file(filename).readlines())
        output += ("%s<>%s (%s)\r\n" %
                   (os.path.basename(filename), title, count))

    try:
        if output_dir == "-":
            subject_file = "stdout"
            sys.stdout.write(output)
        else:
            f = open(subject_file, "w")
            f.write(output)
            f.close()
    except IOError, (errorno, strerror):
        logging.error("Failed to write %s: %s" % (subject_file, strerror))
        return False

    return True


def get_title(line):
    try:
        title = line.split("<>")[4].rstrip("\r\n")
    except IndexError:
        logging.warning("Could not get title")
        return "(Untitled)"
    return title


def auto_link(message, use_template=False):
    prefix = ""
    if not use_template:
        prefix = "R"

    p = re.compile(
        r'<a href="../test/read.cgi/\w+/\d+/\d+[-,]?[^\"]*" '
        r'target="_blank">([^<]+)</a>')
    message = p.sub(r'\1', message)

    p = re.compile(
        u'((?:&gt;|＞){1,2})(\\d+)((?:[^&][-,\\d]+)?)'.encode("shift-jis"))
    message = p.sub(r'<a href="#%(prefix)s\2">\1\2\3</a>', message)
    message = message.replace("%(prefix)s", prefix)

    p = re.compile(
        u'((?:&gt;|＞){1,2})((?:\\x82[\\x4F-\\x58])+)'.encode("shift-jis"))
    message = p.sub(
        lambda x: '<a href="#%s%s">%s%s</a>' %
        (prefix, unicodedata.normalize("NFKC", x.group(2).decode("shift-jis"))
         .encode("shift-jis"), x.group(1), x.group(2)), message)

    p = re.compile(r'([^\"]|^)(https?|ftp)(://[\w:;/.?%\#&=+-~!]+)')
    message = p.sub(r'\1<a href="\2\3" target="_blank">\2\3</a>', message)

    p = re.compile(r'([^h]|^)(ttps?)(://[\w:;/.?%\#&=+-~!]+)')
    message = p.sub(r'\1<a href="h\2\3" target="_blank">\2\3</a>', message)

    return message


def html2text(message):
    p = re.compile(
        r'<a href="../test/read.cgi/\w+/\d+/\d+[-,]?[^\"]*" '
        r'target="_blank">([^<]+)</a>')
    message = p.sub(r'\1', message)

    message = re.compile(r'<br>').sub("\r\n", message)
    message = re.compile(r' ?\r\n ?').sub("\r\n", message)
    message = re.compile(r'^([^ ])').sub(r' \1', message)
    message = re.compile(r'^ ').sub("      ", message)
    message = re.compile(r' $').sub("", message)
    message = re.compile('\r\n').sub("\r\n      ", message)
    message = message.replace("&lt;", "<").replace("&gt;", ">")
    message = message.replace("&nbsp;", " ").replace("&quot;", "\"")
    message = message.replace("&amp;", "&")

    return message


def template_exists(template_dir):
    if template_dir is None:
        return False

    found = True
    filenames = ["header.html", "footer.html", "res.html"]
    for filename in filenames:
        found &= (
            os.path.exists(os.path.join(template_dir, filename)) or
            os.path.exists(os.path.join(template_dir, filename.capitalize())))
    return found


def read_template(template_dir, filename):
    if (os.path.exists(os.path.join(template_dir, filename.capitalize()))):
        filename = filename.capitalize()
    logging.info("%s exists. Loading ..." %
                 os.path.join(template_dir, filename))
    s = open(os.path.join(template_dir, filename)).read()
    return s


def get_template_header(template_dir=None):
    if template_dir == "*text*":
        return "%(title)s\r\n\r\n"

    if template_exists(template_dir):
        s = read_template(template_dir, "header.html")
        s = s.replace("<THREADNAME/>", "%(title)s").replace("<THREADURL/>", "")
        s = s.replace("<SKINPATH/>", "%(skin_path)s")
        s = s.replace("<GETRESCOUNT/>", "%(count)s")
        s = s.replace("<CHARSET/>", "Shift_JIS")
        s = s.replace("<LINK_BACKTOINDEX/>", "")
        s = s.replace("<LINK_BACKTOBOARD/>", "")
        s = s.replace("<LINK_SOURCETHREAD/>", "")
        s = s.replace("<LINK_ALL/>", "%(link_all)s")
        s = s.replace("<LINK_RESNUMBER/>", "%(link_pager)s")
        s = s.replace("<LINK_LASTFIFTY/>", "%(link_last50)s")
        return s

    s = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " \
        "Transitional//EN\">\r\n" \
        "<html>\r\n" \
        "<head>\r\n" \
        "<meta http-equiv=\"Content-Type\" content=\"text/html; " \
        "charset=Shift_JIS\">\r\n" \
        "<meta name=\"Author\" content=\"%(filename)s\">\r\n" \
        "<title>%(title)s</title>\r\n" \
        "</head>\r\n" \
        "<body bgcolor=#efefef text=black link=blue alink=red " \
        "vlink=#660099>\r\n" \
        "<div style=\"margin-top:1em;\"><span style='float:left;'>\r\n" \
        "%(link_all)s %(link_pager)s %(link_last50)s\r\n" \
        "</span>&nbsp;</div>\r\n" \
        "<hr style=\"background-color:#888;color:#888;border-width:0;" \
        "height:1px;position:relative;top:-.4em;\">\r\n" \
        "<h1 style=\"color:red;font-size:larger;font-weight:normal;" \
        "margin:-.5em 0 0;\">%(title)s</h1>\r\n" \
        "<dl class=\"thread\">\r\n"
    return s.decode("utf-8").encode("shift-jis")


def get_template_footer(template_dir=None):
    if template_dir == "*text*":
        return ""

    if template_exists(template_dir):
        s = read_template(template_dir, "footer.html")
        s = s.replace("<LINK_BACKTOINDEX/>", "")
        s = s.replace("<LINK_ALL/>", "%(link_all)s")
        s = s.replace("<LINK_BACK/>", "").replace("<LINK_NEXT/>", "")
        s = s.replace("<LINK_LASTFIFTY/>", "%(link_last50)s")
        s = s.replace("<LINK_CREDIT/>", "")
        s = s.replace("<INDEXCODE_FORRECOMPOSE/>", "")
        s = s.replace("<SIZEKB/>", "%(filesize2)s")
        s = s.replace("<SIZE/>", "%(filesize)s")
        s = s.replace("<BBSNAME/>", "").replace("<BOARDNAME/>", "")
        s = s.replace("<BOARDURL/>", "")
        return s

    s = "</dl>\r\n" \
        "<hr>\r\n" \
        "%(link_all)s\r\n" \
        " %(link_last50)s\r\n" \
        "</body>\r\n" \
        "</html>\r\n"
    return s.decode("utf-8").encode("shift-jis")


def get_template_body(template_dir=None):
    if template_dir == "*text*":
        return u"%(number)s 名前：%(name2)s ：%(date)s\r\n" \
            "%(message)s\r\n\r\n".encode("shift-jis")

    if template_exists(template_dir):
        s = read_template(template_dir, "res.html")
        s = s.replace("<NUMBER/>", "<a href=\"menu\:%(number)s\" "
                      "name=\"%(number)s\">%(number)s</a>")
        s = s.replace("<PLAINNUMBER/>", "%(number)s")
        s = s.replace("<NAME/>", "<b>%(name)s</b>")
        s = s.replace("<MAIL/>", "%(email)s")
        s = s.replace("<MAILNAME/>", "%(name2)s")
        s = s.replace("<DATE/>", "%(date)s")
        s = s.replace("<MESSAGE/>", "%(message)s")
        return s

    s = "<dt><a name=\"R%(number)s\">%(number)s</a> " \
        "名前：%(name2)s：" \
        "%(date)s<dd>%(message)s<br><br>\r\n"
    return s.decode("utf-8").encode("shift-jis")


def print_help():
    print "Usage: dat2html [OPTIONS...] [PATH...]\n"
    print "Options:"
    print "  --template        specify the template directory"
    print "  -o, --output      specify the output directory"
    print "  --text            convert to text format instead of HTML"
    print "  --index           generate an index file"
    print "  --subject         generate a subject.txt file"
    print "  -q, --quiet       suppress warning and info messages"
    print "  -v, --verbose     print debugging messages"
    print "  -h, --help        display this help and exit"
    print "  -V, --version     display version information and exit"


def print_version():
    print "dat2html (dat2html-gtk) %s" % VERSION


def main():
    template_dir = None
    output_dir = os.getcwd()
    index = False
    subject = False
    log_level = logging.INFO

    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            "o:qhvV",
            ["template=", "output=", "text", "index", "subject",
             "quiet", "verbose", "help", "version"])
    except getopt.GetoptError:
        print_help()
        sys.exit(2)

    for opt, value in opts:
        if opt == "--template":
            template_dir = value
        if opt in ("-o", "--output"):
            output_dir = value
        if opt == "--text":
            template_dir = "*text*"
        if opt == "--index":
            index = True
        if opt == "--subject":
            subject = True
        if opt in ("-q", "--quiet"):
            log_level = logging.ERROR
        if opt in ("-v", "--verbose"):
            log_level = logging.DEBUG
        if opt in ("-h", "--help"):
            print_help()
            sys.exit()
        if opt in ("-V", "--version"):
            print_version()
            sys.exit()

    if not len(args):
        print_help()
        sys.exit(2)

    logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")

    if "-" in args:
        if len(args) > 1:
            logging.error("To many arguments")
            sys.exit(2)
        output = convert(sys.stdin.readlines(), "%(filename)s", template_dir)
        print output,
    else:
        convert_files(args, output_dir, template_dir, index, subject)


if __name__ == "__main__":
    main()
