package zephyr.obunsha.royalfrjp;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.AppendJPKeys;
import zephyr.util.AppendLatinKeys;
import zephyr.util.ZephyrUtil;

public class SedBodyHtml {

    private final static String[][] REPLACES = { { "&lt;", "<" }, { "&gt;", ">" },
            { "&amp;amp;", "&" }, { "&amp;", "&" }, { "<b>［用例］</b>", "［用例］" }, { "((", "《" },
            { "))", "》" }, { "&hellip;", "..." }, { "&rarr;", "→" }, { "&larr;", "←" },
            { "&hArr;", "⇔" }, { "&rArr;", "⇒" }, { "&dagger;", "†" }, { "&minus;", "-" },
            { "&mdash;", "―" }, { "℃&deg;C", "℃" }, { "&radic;", "√" } };

    private final static String[][] REG_REPLACES = {
            { "(<dt[^>]+>)(.+?)</dt><dd><p> *\\2 *", "$1$2</dt><dd><p>" }, { "<p> +", "<p>" },
            { " +</p>", "</p>" } };

    private static final String JUMP_REGEXP = "<jump onClick=\"(.+?)\">(.+?)</jump>";
    private static final Pattern JUMP = Pattern.compile(JUMP_REGEXP);
    private static final String WORDJUMP_REGEXP = "jump\\('([^']+)','[^']*'\\)";
    private static final Pattern WORDJUMP = Pattern.compile(WORDJUMP_REGEXP);
    private static final String PDFJUMP_REGEXP =
            "openNewWin\\('[\\./]+/([^/]+/(.+?)\\.pdf)','(.+?)'\\)";
    private static final Pattern PDFJUMP = Pattern.compile(PDFJUMP_REGEXP);

    private static String fixJump(String line, boolean bZuhanJpg) {
        Matcher m = JUMP.matcher(line);
        int k = 0;
        StringBuffer sb = new StringBuffer();
        StringBuffer imgSb = new StringBuffer();
        while (m.find()) {
            if (k < m.start()) {
                sb.append(line.substring(k, m.start()));
            }
            // <jump onClick="jump('A-05321','')">avoir</jump>
            // <jump
            // onClick="openNewWin('../../../zuhan/017-heraldique.pdf','ZUHAN')">h&eacute;raldique
            // 図</jump>
            String onClick = m.group(1);
            String text = m.group(2);
            if (onClick.startsWith("jump")) {
                Matcher m2 = WORDJUMP.matcher(onClick);
                if (m2.find()) {
                    sb.append("<a href=\"#");
                    sb.append(m2.group(1));
                    sb.append("\">");
                    sb.append(text);
                    sb.append("</a>");
                } else {
                    System.err.println("INVALID JUMP1: " + onClick);
                }
            } else {
                Matcher m2 = PDFJUMP.matcher(onClick);
                if (m2.find()) {
                    String path = m2.group(1);
                    String file = m2.group(2);
                    String type = m2.group(3);
                    if (bZuhanJpg && type.equals("ZUHAN")) {
                        sb.append("【");
                        sb.append(text);
                        sb.append("】");
                        imgSb.append("<p><img src=\"");
                        imgSb.append(file);
                        imgSb.append(".jpg\"/></p>");
                    } else {
                        sb.append(text);
                        sb.append(" 【file:");
                        sb.append(path);
                        sb.append("】");
                    }
                }
            }
            k = m.end();
        }
        if (k < line.length()) {
            if (imgSb.length() > 0) {
                int n = line.lastIndexOf("</dd>");
                if (n > 0) {
                    sb.append(line.substring(k, n));
                    sb.append(imgSb);
                    sb.append("</dd>");
                } else {
                    System.err.println("INVALID LINE without </dd>");
                    return line;
                }
            } else {
                sb.append(line.substring(k));
            }
        }
        return sb.toString();
    }

    private static final String YAKUGO_REGEXP = "<yakugo>(.+?)</yakugo>";
    private static final Pattern YAKUGO = Pattern.compile(YAKUGO_REGEXP);

    private static final AppendLatinKeys LATKEY = new AppendLatinKeys();
    private static int nJPKey = 0;

    private static String makeJPKeys(String dt, String dd, String type) {
        Matcher m = YAKUGO.matcher(dd);
        TreeSet<String> keySet = new TreeSet<String>();
        while (m.find()) {
            Matcher m2 = AppendJPKeys.JPWORD.matcher(m.group(1));
            while (m2.find()) {
                keySet.add(m2.group(2));
            }
        }
        if (!keySet.isEmpty()) {
            nJPKey += keySet.size();
            Matcher m3 = AppendJPKeys.JPWORD.matcher(dt);
            String dtKey = (m3.find() ? ZephyrUtil.makeKey(m3.group(2)) : null);
            return LATKEY.makeKeys(dt, dtKey, type, keySet);
        } else {
            return "";
        }
    }

    private static final String DT_DD_REGEXP = "<dt[^>]+>(.+?)</dt>(<dd>.*)";
    private static final Pattern DT_DD = Pattern.compile(DT_DD_REGEXP);

    private static String appendJPKeys(String line, String type) {
        Matcher m = DT_DD.matcher(line);
        if (!m.find()) {
            return line;
        }
        String dt = m.group(1);
        String dd = m.group(2);
        String jpKey = makeJPKeys(dt, dd, type);
        dd = dd.replace("<yakugo>", "").replace("</yakugo>", "");
        return line.substring(0, m.start(2)) + jpKey + dd;
    }

    private static String sed(String line, boolean bZuhanJpg) {
        line = ZephyrUtil.sed(line, REPLACES, REG_REPLACES);
        line = fixJump(line, bZuhanJpg);
        return line;
    }

    private static void sed(String[] args) throws Exception {
        InputStream in = System.in;
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        boolean bZuhanJpg = false;
        String keyType = "表記";
        for (String a : args) {
            if (a.equals("WithZuhan")) {
                bZuhanJpg = true;
            } else {
                keyType = AppendLatinKeys.getKeyType(a);
            }
        }

        while ((line = br.readLine()) != null) {
            line = sed(line, bZuhanJpg);
            line = appendJPKeys(line, keyType);
            System.out.println(line);
        }
        if (nJPKey > 0) {
            System.err.println("append " + nJPKey + " extra Japanese " + keyType + " keys");
        }
    }

    public static void main(String[] args) {
        try {
            ZephyrUtil.setUTF8Ouput();
            sed(args);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
