package zephyr.sanshusha.accessdejp;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.ZephyrUtil;

public class DecodeRoboword4 {

    private final TagDecoder tagDecoder = new TagDecoder();

    private int decodeByLZ(byte[] inData, int index, int lenoff, int stoff, Buffer outBuf) {
        int length = Table2.decode(Util.sbyte2int(inData[index + 1])) + lenoff;
        int start = Table2.decode(Util.sbyte2int(inData[index + 2])) + stoff;
        int outbase = outBuf.length() - start;
        if (outbase < 0) {
            System.err.println("ignore outbase=" + outbase);
            return 2;
        }
        for (int i = 0; i < length; i++) {
            int v = Util.sbyte2int(outBuf.data[outbase + i]);
            outBuf.add((byte) v);
        }
        return 2;
    }

    private int decodeByte(byte[] inData, int index, Buffer outBuf) {
        int unsignedByte = Util.sbyte2int(inData[index]);
        int code = Table1.decode(unsignedByte);
        if (code > 0) {
            outBuf.add((byte) code);
            return 0;
        }
        //
        byte d;
        int skip = 0;
        switch (unsignedByte) {
        case 0x23:
            outBuf.add(outBuf.getLeftByte(2, 0));
            break;
        case 0x3D:
            d = outBuf.getLeftByte(1, 0);
            outBuf.add(d);
            outBuf.add(d);
            outBuf.add(d);
            break;
        case 0x3E:
            outBuf.add(outBuf.getLeftByte(1, 3));
            break;
        case 0x42:
            outBuf.add(outBuf.getLeftByte(1, -1));
            break;
        case 0x48:
            outBuf.add(outBuf.getLeftByte(1, -3));
            break;
        case 0x4A:
            d = outBuf.getLeftByte(1, 0);
            outBuf.add(d);
            outBuf.add(d);
            break;
        case 0xA3:
            outBuf.add(outBuf.getLeftByte(1, 0));
            break;
        case 0xCA:
            outBuf.add(outBuf.getLeftByte(1, 2));
            break;
        case 0xDE:
            outBuf.add(outBuf.getLeftByte(1, 1));
            break;
        case 0xE2:
            outBuf.add(outBuf.getLeftByte(1, 0));
            break;
        case 0xE9:
            outBuf.add(outBuf.getLeftByte(1, -2));
            break;
        case 0xED:
            if (Util.sbyte2int(inData[index + 1]) == 0xCA) {
                outBuf.add((byte) 0x0A);
                skip = 1;
            } else {
                outBuf.deleteLast();
            }
            break;
        case 0x29:
            skip = decodeByLZ(inData, index, 0, -1, outBuf);
            break;
        case 0x6F:
            skip = decodeByLZ(inData, index, 0, -2, outBuf);
            break;
        case 0x78:
            skip = decodeByLZ(inData, index, 0, 0, outBuf);
            break;
        case 0xEB:
            skip = decodeByLZ(inData, index, 1, 0, outBuf);
            break;
        case 0xE0:
        case 0xE1:
            // タグ（後処理するのでここではそのまま）
            outBuf.add(inData[index]);
            break;
        case 0x25:
        case 0x26:
        case 0x32:
        case 0x39:
        case 0xA6:
        case 0xB1:
        case 0xCB:
        case 0xF1:
            // 用途不明
            outBuf.add(inData[index]);
            break;
        default:
            System.err.println("unknown byte: "
                    + String.format("0x%X=%d", unsignedByte, unsignedByte));
            break;
        }
        return skip;
    }

    private void decode(byte[] inData, int datalen, Buffer outBuf) {
        for (int i = 0; i < datalen; i++) {
            int nSkip = decodeByte(inData, i, outBuf);
            if (nSkip > 0) {
                i += nSkip;
            }
        }
    }

    private int parseHeader(BinaryFile bin) throws Exception {
        bin.skip(896);
        int bodyAddress = bin.readInt();
        bin.skip(20);
        int indexAddress = bin.readInt();
        int nSkip = (bodyAddress - (896 + 4 + 20 + 4));
        if (nSkip > 0) {
            bin.skip(nSkip);
        }
        // 本文データ長
        return (indexAddress - bodyAddress);
    }

    private static final String SINGLE_REGEXP = "^<b>((<sub>.+?</sub> *)?(.+?))</b>　?";
    private static final Pattern SINGLE = Pattern.compile(SINGLE_REGEXP);
    private static final String LINE_REGEXP = "[^\n]+";
    private static final Pattern LINE = Pattern.compile(LINE_REGEXP);
    private static final String MIDASHI_REGEXP = "(…|…?［.+?］)([^［〈]+)";
    private static final Pattern MIDASHI = Pattern.compile(MIDASHI_REGEXP);

    private String makeHyokiKey(String key) {
        return "<key type=\"表記\">" + key + "</key>";
    }

    private String makeKey(String key) {
        return makeHyokiKey(key);
    }

    private static final String MORPH_REGEXP = "<p>(.+?)</p>";
    private static final Pattern MORPH = Pattern.compile(MORPH_REGEXP);

    private String fixMorphTable(String txt) {
        final String TABLE_START = "<p>◆活用表<SUBSECTION/></p>";
        final String TABLE_END = TagDecoder.SECTION_TAG + "</p>";
        int idx = txt.indexOf(TABLE_START);
        if (idx < 0) {
            return txt;
        }
        idx += TABLE_START.length();
        int idx2 = txt.indexOf(TABLE_END, idx);
        if (idx2 < 0) {
            return txt;
        }
        idx2 += TABLE_END.length();
        StringBuffer sb = new StringBuffer();
        sb.append(txt.substring(0, idx));
        sb.append("<indent val=\"2\"><p>");
        String tabletxt = txt.substring(idx, idx2);
        Matcher m = MORPH.matcher(tabletxt);
        boolean bFirst = true;
        int k = 0, nent = 0;
        int indent = 2;
        while (m.find()) {
            String column = m.group(1);
            if (bFirst) {
                bFirst = false;
            } else if (column.startsWith("（")) {
                if (indent == 2) {
                    sb.append("</p><p>");
                } else {
                    sb.append("</p><indent val=\"2\"><p>");
                    indent = 2;
                }
                nent = 0;
            } else if (column.startsWith("·")) {
                sb.append("</p><indent val=\"3\"><p>");
                indent = 3;
                nent = 0;
                column = "・" + column.substring(1);
            } else {
                nent++;
            }
            if (nent >= 2) {
                sb.append(" / ");
            }
            sb.append(column);
            if (nent == 0) {
                sb.append(" ");
            }
            k = m.end();
        }
        if (k < tabletxt.length()) {
            sb.append(tabletxt.substring(k));
        }
        sb.append("</p>");
        sb.append("<indent val=\"1\">");
        sb.append(txt.substring(idx2));
        return sb.toString();
    }

    private String fixMarks(String line) {
        final String[][] replaces =
                new String[][] { { "’", "'" }, { "（（", "《" }, { "））", "》" }, { "《英）", "（英 " },
                        { "…", "･･･" }, { "�", "･" }, { "/（", " / （" } };
        final String[][] regReplaces = new String[][] { { "([\\.,;:!])([^ \\.])", "$1 $2" } };
        return ZephyrUtil.sed(line, replaces, regReplaces);
    }

    private final HashMap<String, Integer> dtIdMap = new HashMap<String, Integer>();

    private void addDtId(String dt, int idx) {
        if (dt.endsWith("*")) {
            dt = dt.substring(0, dt.length() - 1);
        } else if (dt.endsWith("（*）")) {
            dt = dt.substring(0, dt.length() - 3);
        } else if (dt.startsWith("<sub>")) {
            int i = dt.indexOf("</sub>");
            dt = dt.substring(i + 6).trim();
        }
        dtIdMap.put(dt, idx);
        String dt2 = dt.replace("·", "");
        if (!dt2.equals(dt)) {
            dtIdMap.put(dt2, idx);
        }
        String dt3 = dt2.replace("|", "");
        if (!dt3.equals(dt2)) {
            dtIdMap.put(dt3, idx);
        }
    }

    private int getDtId(String word) {
        Integer idx = dtIdMap.get(word);
        if (idx != null) {
            return idx;
        }
        String word2 = word.replace("|", "");
        if (!word2.equals(word)) {
            idx = dtIdMap.get(word2);
            if (idx != null) {
                // System.err.println("WARN: refword=" + word + ", headword=" +
                // word2);
                return idx;
            }
        }
        return -1;
    }

    private String htmlFormat(String line, int idx, boolean isdejp) {
        Matcher m = SINGLE.matcher(line);
        if (!m.find()) {
            return line;
        }
        String dt = m.group(1);
        String dd = line.substring(m.end());
        String key = "";
        if (m.group(2) != null) {
            key += makeKey(m.group(3));
        }
        //
        m = MIDASHI.matcher(dt);
        if (m.find()) {
            key += makeKey(m.group(2));
        }
        line = "<dt id=\"" + idx + "\">" + dt + "</dt>" + key + "<dd>";
        addDtId(dt, idx);
        //
        dd = fixMarks(dd);
        m = LINE.matcher(dd);
        int indent = 1;
        while (m.find()) {
            String txt = m.group();
            if (isdejp) {
                char c = Character.toLowerCase(txt.charAt(0));
                if (('a' <= c && c <= 'z') || txt.startsWith("<i>") || txt.startsWith("▷")) {
                    if (indent != 2) {
                        line += "<indent val=\"2\">";
                        indent = 2;
                    }
                } else if (indent != 1) {
                    line += "<indent val=\"1\">";
                    indent = 1;
                }
            }
            line += "<p>" + txt + "</p>";
        }
        line = fixMorphTable(line);
        return line + "</dd>";
    }

    private static final String REF_REGEXP = "＜([^「]+?)「";
    private static final Pattern REF = Pattern.compile(REF_REGEXP);
    private static final String PART_REGEXP = "（([^の）]+?)の.+?）";
    private static final Pattern PART = Pattern.compile(PART_REGEXP);
    private static final String REL_REGEXP = "←([a-zA-ZäöüßÄÖÜ]+)";
    private static final Pattern REL = Pattern.compile(REL_REGEXP);
    private static final String DEWORD_REGEXP = "([a-zA-ZäöüßÄÖÜ\\|]+)";
    private static final Pattern DEWORD = Pattern.compile(DEWORD_REGEXP);

    private String addHref(String line, Pattern p) {
        StringBuffer sb = new StringBuffer();
        int i = 0;
        Matcher m = p.matcher(line);
        while (m.find()) {
            String key = m.group(1);
            int idx = getDtId(key);
            if (idx >= 0) {
                sb.append(line.substring(i, m.start(1)));
                sb.append(String.format("<a href=\"#%d\">%s</a>", idx, key));
                i = m.end(1);
            }
        }
        if (i == 0) {
            return line;
        }
        if (i < line.length()) {
            sb.append(line.substring(i));
        }
        return sb.toString();
    }

    private String addRef(String line) {
        line = addHref(line, REF);
        line = addHref(line, PART);
        line = addHref(line, REL);
        //
        final String KANREN_START = "◆関連語<SUBSECTION/></p><indent val=\"2\"><p>";
        int idx = line.indexOf(KANREN_START);
        if (idx > 0) {
            idx += KANREN_START.length();
            int idx2 = line.indexOf("</p>", idx);
            String sub = addHref(line.substring(idx, idx2), DEWORD);
            line = line.substring(0, idx) + sub + line.substring(idx2);
        }
        return line;
    }

    private String dropTags(String line) {
        final String[][] replaces =
                new String[][] { { TagDecoder.SECTION_TAG, "" }, { TagDecoder.SUBSECTION_TAG, "" },
                        { TagDecoder.WAKU_START_TAG, "" }, { TagDecoder.WAKU_END_TAG, "" },
                        { "<p></p>", "" }, { "<b></b>", "" } };
        return ZephyrUtil.sed(line, replaces, null);
    }

    private void loadDic(String inFile) throws Exception {
        BinaryFile bin = new BinaryFile(inFile);
        byte[] inData = new byte[Buffer.BUFSIZE];
        Buffer outBuf = new Buffer();
        boolean isdejp = inFile.endsWith("sansygj.dic");

        int remain = parseHeader(bin);
        int idx = 0;
        LinkedList<String> bodyTxt = new LinkedList<String>();

        while (remain > 0) {
            bin.skip(1);
            int indexVol = bin.readInt();
            bin.skip(12);
            if (indexVol <= 0 || indexVol > inData.length) {
                System.err.println("indexVol=" + indexVol);
                break;
            }
            bin.read(inData, indexVol);
            remain -= (1 + 4 + 12 + indexVol);
            //
            decode(inData, indexVol, outBuf);
            String line = tagDecoder.decode(outBuf.data, outBuf.length());
            outBuf.clear();
            bodyTxt.add(htmlFormat(line, idx, isdejp));
            idx++;
            if ((idx % 1000) == 0) {
                System.err.print(".");
            }
        }

        System.out.println("<html><body>");
        for (String line : bodyTxt) {
            System.out.println(dropTags(addRef(line)));
        }
        System.out.println("</body></html>");
        System.err.println();
        System.err.println("done. " + idx + " entries");
        TagDecoder.dumpUnknown();
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            DecodeRoboword4 app = new DecodeRoboword4();
            try {
                ZephyrUtil.setUTF8Ouput();
                app.loadDic(args[0]);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("Usage: java EncodeRoboWord4 SOME_DIR/robo4_file.dic");
        }
    }

}
