package zephyr.obunsha.royalfrjp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.ZephyrUtil;

public class MakeBodyHtml {

    private String fixItem(String item) {
        String[][] replaces =
                {
                        { "<hr>", "<hr/>" },
                        {
                                "<a name=\"info1\">&nbsp;</a><span id=\"arrow_info1\" class=\"arrow\"  onClick=\"Display_block('info1')\">▼</span></a>",
                                "<a name=\"info1\">&nbsp;<span id=\"arrow_info1\" class=\"arrow\"  onClick=\"Display_block('info1')\">▼</span></a>" },
                        {
                                "<a name=\"yougo1\">&nbsp;</a><span id=\"arrow_yougo1\" class=\"arrow\"  onClick=\"Display_block('yougo1')\">▼</span></a>",
                                "<a name=\"yougo1\">&nbsp;<span id=\"arrow_yougo1\" class=\"arrow\"  onClick=\"Display_block('yougo1')\">▼</span></a>" },
                        {
                                "<a name=\"inyou1\">&nbsp;</a><span id=\"arrow_inyou1\" class=\"arrow\"  onClick=\"Display_block('inyou1')\">▼</span></a>",
                                "<a name=\"inyou1\">&nbsp;<span id=\"arrow_inyou1\" class=\"arrow\"  onClick=\"Display_block('inyou1')\">▼</span></a>" },
                        { "<span class=\"gogen\">(<ギリシアの地方Attique)</span>",
                                "<span class=\"gogen\">(＜ギリシアの地方Attique)</span>" },
                        { "＜a name=\"yourei_1\">", "<a name=\"yourei_1\">" },
                        {
                                "<a name=\"kanren1\">&nbsp;</a><span id=\"arrow_kanren1\" class=\"arrow\"  onClick=\"Display_block('kanren1')\">▼</span></a>",
                                "<a name=\"kanren1\">&nbsp;<span id=\"arrow_kanren1\" class=\"arrow\"  onClick=\"Display_block('kanren1')\">▼</span></a>" },
                        { "<span class=\"gogen\">(<it.)</span>",
                                "<span class=\"gogen\">(＜it.)</span>" },
                        { "<span class=\"gogen\">(<angl.)</span>",
                                "<span class=\"gogen\">(＜angl.)</span>" },
                        { "<span class=\"gogen\">(<数学者Hermite)</span>",
                                "<span class=\"gogen\">(＜数学者Hermite)</span>" },
                        { "<span class=\"gogen\">(<イタリア北部の州Lombardie)</span>",
                                "<span class=\"gogen\">(＜イタリア北部の州Lombardie)</span>" },
                        { "＜span ", "<span " },
                        {
                                "<tr id=\"sub_kanren1\" class=\"block\"><span class=\"tyuki\">(<span class=\"aster\"><td><table>＊</span>",
                                "<tr id=\"sub_kanren1\" class=\"block\"><td><table><span class=\"tyuki\">(<span class=\"aster\">＊</span>" },
                        { "<span class=\"gogen\">(<タイ王国の旧称Siam)</span>",
                                "<span class=\"gogen\">(＜タイ王国の旧称Siam)</span>" } };
        for (int i = 0; i < replaces.length; i++) {
            item = item.replace(replaces[i][0], replaces[i][1]);
        }
        //
        String[][] replaceRegexp = { { "(<input[^/<]+)>", "$1/>" } };
        for (int i = 0; i < replaceRegexp.length; i++) {
            item = item.replaceAll(replaceRegexp[i][0], replaceRegexp[i][1]);
        }
        return item;
    }

    private static final HashMap<Character, String> hatuonMap = new HashMap<Character, String>();
    static {
        hatuonMap.put('+', "ɑ");
        hatuonMap.put('#', "ɛ");
        hatuonMap.put('%', "œ");
        hatuonMap.put('?', "ɸ");
        hatuonMap.put('$', "ɔ");
        hatuonMap.put('\'', "ə");
        hatuonMap.put('M', "ɲ");
        hatuonMap.put('G', "ɛ̀");
        hatuonMap.put('_', "ɥ");
        //
        hatuonMap.put('0', "ʒ");
        hatuonMap.put('1', "ã");
        hatuonMap.put('2', "ɑ̃");
        hatuonMap.put('3', "ɛ̃");
        hatuonMap.put('4', "ɔ̃");
        hatuonMap.put('5', "œ̃");
        hatuonMap.put('6', "ʃ");
        hatuonMap.put('7', "ŋ");
        hatuonMap.put('8', "θ");
        hatuonMap.put('9', "ð");
    }

    private String fixHatuonChars(String hatuon) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < hatuon.length(); i++) {
            char c = hatuon.charAt(i);
            String phon = hatuonMap.get(c);
            if (phon != null) {
                sb.append(phon);
            } else if ('a' <= c && c <= 'z') {
                sb.append(c);
            } else if (c == '<') {
                int i2 = hatuon.indexOf('>', i);
                if (i2 > 0) {
                    sb.append(hatuon.substring(i, i2 + 1));
                    i = i2;
                } else {
                    System.err.println("invalid hatuon: " + hatuon);
                }
            } else {
                switch (c) {
                case '/':
                case ',':
                case ';':
                case ' ':
                case '(':
                case ')':
                case '-':
                case '~':
                case '.':
                    sb.append(c);
                    break;
                default:
                    System.err.println("unknown " + c + " in " + hatuon);
                    System.exit(1);
                    break;
                }
            }
        }
        return sb.toString();
    }

    private static final String HATUON_START = "<span class=\"hatuon\">";
    private static final String SPAN_START = "<span ";
    private static final String SPAN_END = "</span>";
    private static final String HATUON_END = SPAN_END;
    private static final String PHONETIC_START = "<span class=\"phonetic_inLine\">";
    private static final String PHONETIC_END = SPAN_END;

    private int[] findHatuon(String item, int i) {
        int idx = item.indexOf(HATUON_START, i);
        if (idx >= 0) {
            idx += HATUON_START.length();
            int idx2 = item.indexOf(HATUON_END, idx);
            int idx3 = item.indexOf(SPAN_START, idx);
            if (idx3 > 0 && idx3 < idx2) {
                idx3 = item.indexOf(SPAN_END, idx3);
                idx2 = item.indexOf(HATUON_END, idx3 + SPAN_END.length());
            }
            return new int[] { idx, idx2 };
        }
        idx = item.indexOf(PHONETIC_START, i);
        if (idx >= 0) {
            idx += PHONETIC_START.length();
            int idx2 = item.indexOf(PHONETIC_END, idx);
            return new int[] { idx, idx2 };
        } else {
            return null;
        }

    }

    private String fixHatuon(String item) {
        StringBuffer sb = new StringBuffer();
        int idx = 0;
        while (idx < item.length()) {
            int[] idxes = findHatuon(item, idx);
            if (idxes != null) {
                String hatuon = item.substring(idxes[0], idxes[1]);
                sb.append(item.substring(idx, idxes[0]));
                sb.append(fixHatuonChars(hatuon));
                idx = idxes[1];
            } else {
                sb.append(item.substring(idx));
                break;
            }
        }
        return sb.toString();
    }

    private static final HashMap<Character, String> gaijiMap = new HashMap<Character, String>();
    static {
        // http://www.unicode.org/charts/charindex.html
        // http://www.unicode.org/charts/unihan.html
        gaijiMap.put('亜', "枘"); // ほぞ
        gaijiMap.put('唖', "痊"); // ざ
        gaijiMap.put('ち', "א"); // aleph
        gaijiMap.put('鞍', "蒴"); // さく
        gaijiMap.put('逢', "窩"); // か
        gaijiMap.put('握', "麬"); //
        gaijiMap.put('娃', "炻"); //
        gaijiMap.put('阿', "埵"); //
        gaijiMap.put('哀', "癤"); //
        gaijiMap.put('愛', "垜"); //
        gaijiMap.put('杏', "煆"); //
        gaijiMap.put('挨', "蒄"); //
        gaijiMap.put('姶', "癭"); //
        gaijiMap.put('葵', "獐"); //
        gaijiMap.put('茜', "搐"); //
        gaijiMap.put('穐', "骶"); //
        gaijiMap.put('悪', "禱"); //
        gaijiMap.put('こ', "©"); //
        gaijiMap.put('渥', "皶"); //
        gaijiMap.put('旭', "簎"); //
        gaijiMap.put('は', "<sup>1</sup>/<sub>2</sub>"); //
        gaijiMap.put('以', "鄧"); //
        gaijiMap.put('葦', "桛"); //
        gaijiMap.put('芦', "顆"); //
        gaijiMap.put('鯵', "蹰"); //
        gaijiMap.put('斡', "鱝"); //
        gaijiMap.put('扱', "(金+通)"); // gangue (金+通)?石　ひせき U9300;近辺？
        gaijiMap.put('梓', "糝"); //
        gaijiMap.put('宛', "鬐"); // garrot1, trapèze: U9B10, 髪 - 友 + 者?
        gaijiMap.put('姐', "晷"); //
        gaijiMap.put('虻', "嗉"); //
        gaijiMap.put('飴', "橅"); //
        gaijiMap.put('絢', "騸"); //
        gaijiMap.put('圧', "龕"); //
        gaijiMap.put('の', "<sup>1</sup>/<sub>3</sub>"); //
        gaijiMap.put('綾', "楣"); //
        gaijiMap.put('鮎', "妍"); //
        gaijiMap.put('あ', "☞"); //
        gaijiMap.put('或', "瘭"); //
        gaijiMap.put('伊', "鈹"); //
        gaijiMap.put('粟', "跗"); //
        gaijiMap.put('暗', "墩"); //
        gaijiMap.put('案', "癆"); //
        gaijiMap.put('安', "瘙"); //
        gaijiMap.put('庵', "嘈"); //
        gaijiMap.put('せ', "ι"); //
        gaijiMap.put('位', "苆"); //
        gaijiMap.put('そ', "卐"); //
        gaijiMap.put('闇', "跑"); //
        gaijiMap.put('た', "℣"); // FR:verset, EN:versicle
        //
        // NOTE: musical symbols: http://www.unicode.org/charts/PDF/U1D100.pdf
        // http://www.mozart.co.uk/filelibrary/fonts.htm
        gaijiMap.put('く', "\uE200"); // $ + ||
        gaijiMap.put('お', "\uE201"); // new String(Character.toChars(0x1D135)));
                                     // 2/4拍子記号
        gaijiMap.put('か', "\uE202"); // new String(Character.toChars(0x1D110)));
                                     // フェルマータ記号
        gaijiMap.put('き', "\uE203"); // new String(Character.toChars(0x1D12A)));
                                     // ダブルシャープ
        gaijiMap.put('袷', "\uE204"); // = 𥝱 = "\u25771"
        gaijiMap.put('い', "√3"); // "√3"); //
        gaijiMap.put('う', "<sup>3</sup>√9"); // "∛9"
        gaijiMap.put('え', "<sup>4</sup>√10"); // "∜10"
    }

    static void showGaiji() {
        TreeSet<String> set = new TreeSet<String>(gaijiMap.values());
        for (String s : set) {
            System.err.print(String.format("%s\t0x", s));
            for (int i = 0; i < s.length(); i++) {
                System.err.print(String.format("%04X ", (int) s.charAt(i)));
            }
            System.err.println();
        }
    }

    private static final String GAIJI_REGEXP = "<span class=\"gaiji\">(.+?)</span>";
    private static final Pattern GAIJI = Pattern.compile(GAIJI_REGEXP);

    private String fixGaiji(String item) {
        StringBuffer sb = new StringBuffer();
        Matcher m = GAIJI.matcher(item);
        int idx = 0;
        while (m.find()) {
            sb.append(item.substring(idx, m.start()));
            String txt = m.group(1);
            for (int i = 0; i < txt.length(); i++) {
                char c = txt.charAt(i);
                String gaiji = gaijiMap.get(c);
                if (gaiji != null && gaiji.length() > 0) {
                    if (gaiji.length() == 1) {
                        sb.append(String.format("<gaiji>%x</gaiji>", (int) gaiji.charAt(0)));
                    } else {
                        sb.append(gaiji);
                    }
                } else {
                    System.err.println("unknown gaiji: " + c);
                    sb.append("<span class=\"gaiji\">");
                    sb.append(c);
                    sb.append("</span>");
                }
            }
            idx = m.end();
        }
        if (idx < item.length()) {
            sb.append(item.substring(idx));
        }
        return sb.toString();

    }

    private static final String CONJU_REGEXP = "onClick=\"openVerbTable\\('([^']+)'\\)\">";
    private static final Pattern CONJU = Pattern.compile(CONJU_REGEXP);

    private String getConjuId(File file) throws Exception {
        String filename = file.getAbsolutePath().replace(".htm", "_head.htm");
        File headFile = new File(filename);
        if (!headFile.isFile()) {
            return "";
        }
        InputStream in = new FileInputStream(headFile);
        BufferedReader br =
                new BufferedReader(new InputStreamReader(in, ZephyrUtil.SHIFT_JIS_CODE));
        String line;
        String id = null;

        while ((line = br.readLine()) != null) {
            Matcher m = CONJU.matcher(line);
            if (m.find()) {
                id = m.group(1);
                break;
            }
        }
        br.close();
        in.close();
        return id;
    }

    private static final String MIDASI_REGEXP =
            "<body onLoad=\"init\\('([^']+)'\\)\">.*?<span class=\"midasi\">(.*?)</span>";
    private static final Pattern MIDASI = Pattern.compile(MIDASI_REGEXP);

    private String appendMidasi(String item, String conjuId) {
        Matcher m = MIDASI.matcher(item);
        if (m.find()) {
            String dt = String.format("<dt id=\"%s\">%s</dt>", m.group(1), m.group(2));
            if (conjuId != null) {
                dt += "<conju>" + conjuId + "</conju>";
            }
            return item.substring(0, m.start()) + dt + item.substring(m.start());
        } else {
            return item;
        }
    }

    private void loadOneItemPage(File file) throws Exception {
        InputStream in = new FileInputStream(file);
        BufferedReader br =
                new BufferedReader(new InputStreamReader(in, ZephyrUtil.SHIFT_JIS_CODE));
        String line;
        StringBuffer sb = new StringBuffer();

        while ((line = br.readLine()) != null) {
            if (line.startsWith("<body ")) {
                break;
            }
        }
        sb.append(line);
        //
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.isEmpty()) {
                continue;
            }
            sb.append(line);
            if (line.startsWith("</body>")) {
                break;
            }
        }
        String conjuId = getConjuId(file);
        String item = fixItem(sb.toString());
        item = fixHatuon(item);
        item = fixGaiji(item);
        item = appendMidasi(item, conjuId);
        System.out.println(item);
        br.close();
        in.close();
    }

    private int nLoaded = 0;

    private void loadItemPages(String dirname, File dir) throws Exception {
        int i = 0;
        while (true) {
            i++;
            String filename = String.format("%s-%05d.htm", dirname, i);
            File file = new File(dir.getAbsolutePath(), filename);
            if (file.isFile()) {
                loadOneItemPage(file);
                nLoaded++;
                if ((nLoaded % 1000) == 0) {
                    System.err.print(".");
                }
            } else {
                break;
            }
        }
    }

    private void outHeader() throws Exception {
        System.out.println("<?xml version=\"1.0\"?>");
        System.out.println("<!DOCTYPE Dummy PUBLIC \"Dummy\" \"dtd/Dummy.dtd\" [");
        System.out
                .println("<!ENTITY % ISOCommon PUBLIC \"-//ISO//ISO Common//EN\" \"dtd/ISOCommon.dtd\" >");
        System.out.println("%ISOCommon;");
        System.out.println("]>");
        System.out.println("<royal>");
    }

    private void outTail() throws Exception {
        System.out.println("</royal>");
    }

    static void checkDirectory(File dir) {
        if (!dir.isDirectory()) {
            System.err.println("**************************");
            if (dir.exists()) {
                System.err.println("ERROR: " + dir + " is not a directory.");
            } else {
                System.err.println("ERROR: cannot find " + dir);
            }
            System.err.println("**************************");
            System.exit(1);
        }
    }

    private void loadItemPages(String path) throws Exception {
        File dir = new File(path);
        checkDirectory(dir);
        String dirname = dir.getName();
        outHeader();
        if (dirname.length() == 1) {
            loadItemPages(dirname, dir);
        } else {
            for (char c = 'a'; c <= 'z'; c++) {
                System.err.print(c);
                dir = new File(path, Character.toString(c));
                dirname = dir.getName();
                loadItemPages(dirname, dir);
            }
        }
        outTail();
        System.err.println();
        System.err.println("done. " + nLoaded + " entries");
    }

    public static void main(String[] args) {
        MakeBodyHtml app = new MakeBodyHtml();
        if (args.length > 0) {
            ZephyrUtil.setUTF8Ouput();
            try {
                app.loadItemPages(args[0]);
                // showGaiji();
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err
                    .println("Usage: java MakeBodyHtml <SOME_DIR>/royal/contents/fr/itempages{/[a-z]}");
        }
    }

}
