package zephyr.kenkyusya.lajp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.AppendJPKeys;
import zephyr.util.MultiHashMap;
import zephyr.util.ZephyrUtil;

public class MakeJpLaBodyHtml {

    // 文字コードは　"Shift_JIS" ではなくて "Windows-31J" を使うこと。
    // "～" を Shift_JIS で出力すると、？になってしまう。
    // WAVE DASH問題（TILDE問題）
    // http://park3.wakwak.com/~ozashin/sw_tips/webapp_tips/sjis_charset.html

    private final HashMap<String, Integer> latinIdMap = new HashMap<String, Integer>();

    private static final String DT_ID_REGEXP = "<dt id=\"([0-9]+)\">(.+?)</dt>";
    private static final Pattern DT_ID = Pattern.compile(DT_ID_REGEXP);

    private static final String[] TAGNAMES = { "a", "href" };

    private void loadLAJPBody(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        int n = 0;
        while ((line = br.readLine()) != null) {
            Matcher m = DT_ID.matcher(line);
            if (m.find()) {
                String dt = m.group(2);
                Integer id = Integer.parseInt(m.group(1));
                for (String key : dt.split(", *")) {
                    String key2 = ZephyrUtil.makeKey(ZephyrUtil.hex2uni(key), true).toLowerCase();
                    if (!latinIdMap.containsKey(key2)) {
                        latinIdMap.put(key2, id);
                    }
                }
                n++;
            }
        }
        System.err.println("load " + n + " latin-japanese entries' id");
        for (String t : TAGNAMES) {
            latinIdMap.remove(t);
        }
    }

    private static final String MIDASHI_START = "見だし開始";
    private static final String MIDASHI_END = "見だし終了";
    private static final String ENTRY_DELIM = "======";

    private MultiHashMap<String, String> kanaMap = new MultiHashMap<String, String>();

    MakeJpLaBodyHtml() {
        // 漢字見出しなのにカナが無いものを登録
        // これらの語は、羅和辞典ソフトでは漢字でもカナでも見つからない！！
        kanaMap.put("安楽死", "あんらくし");
        kanaMap.put("引力", "いんりょく");
        kanaMap.put("運命", "うんめい");
        kanaMap.put("遠慮", "えんりょ");
        kanaMap.put("顔料", "がんりょう");
        kanaMap.put("緩和", "かんわ");
        kanaMap.put("吟味", "ぎんみ");
        kanaMap.put("金利", "きんり");
        kanaMap.put("軍服", "ぐんぷく");
        kanaMap.put("訓練", "くんれん");
        kanaMap.put("権力", "けんりょく");
        kanaMap.put("言論", "げんろん");
        kanaMap.put("山脈", "さんみゃく");
        kanaMap.put("人類学", "じんるいがく");
        kanaMap.put("神話", "しんわ");
        kanaMap.put("寸法", "すんぽう");
        kanaMap.put("全力", "ぜんりょく");
        kanaMap.put("洗練", "せんれん");
        kanaMap.put("増大", "ぞうだい");
        kanaMap.put("担保", "たんぽ");
        kanaMap.put("暖炉", "だんろ");
        kanaMap.put("陳列", "ちんれつ");
        kanaMap.put("連れる", "つれる");
        kanaMap.put("天文学", "てんもんがく");
        kanaMap.put("電話", "でんわ");
        kanaMap.put("貪欲", "どんよく");
        kanaMap.put("難民", "なんみん");
        kanaMap.put("任命", "にんめい");
        kanaMap.put("年齢", "ねんれい");
        kanaMap.put("反論", "はんろん");
        kanaMap.put("貧乏", "びんぼう");
        kanaMap.put("分別", "ふんべつ");
        kanaMap.put("分裂", "ぶんれつ");
        kanaMap.put("弁論", "べんろん");
        kanaMap.put("万年筆", "まんねんひつ");
        kanaMap.put("民族", "みんぞく");
        kanaMap.put("群れ", "むれ");
        kanaMap.put("綿密", "めんみつ");
        kanaMap.put("問題", "もんだい");
        kanaMap.put("和らげる", "やわらげる");
        kanaMap.put("揺れる", "ゆれる");
        kanaMap.put("弱る", "よわる");
        kanaMap.put("乱用", "らんよう");
        kanaMap.put("倫理", "りんり");
        kanaMap.put("連絡", "れんらく");
        kanaMap.put("論理", "ろんり");
        kanaMap.put("湾", "わん");
    }

    private final static String NAKAGURO_HALF = "･";
    private final static String NAKAGURO_FULL = "・";

    private void loadMidashi(BufferedReader br) throws Exception {
        String line;
        line = br.readLine();
        if (!MIDASHI_START.equals(line)) {
            throw new Exception("missing " + MIDASHI_START);
        }
        while ((line = br.readLine()) != null) {
            if (line.equals(MIDASHI_END)) {
                break;
            }
            String[] words = line.trim().split("\t");
            if (words.length == 2) {
                // 愛想＝あいそ、あいそう 、など複数見出しあり
                kanaMap.put(words[0], words[1]);
                if (words[0].contains(NAKAGURO_HALF)) {
                    // かな見出しでは半角、本文では全角の中黒が使われている
                    kanaMap.put(words[0].replace(NAKAGURO_HALF, NAKAGURO_FULL), words[1]);
                }
            }
        }
        System.err.println("load " + kanaMap.size() + " kana entries");
    }

    private final LinkedList<String> entries = new LinkedList<String>();
    private final HashMap<String, Integer> dtIdMap = new HashMap<String, Integer>();

    private void loadBody(BufferedReader br) throws Exception {
        StringBuffer sb = new StringBuffer();
        String line;
        int id = 0;
        while ((line = br.readLine()) != null) {
            if (line.trim().isEmpty()) {
                continue;
            }
            if (line.equals(ENTRY_DELIM)) {
                sb.append("</dd>");
                entries.add(sb.toString());
                sb = null;
                sb = new StringBuffer();
            } else if (line.charAt(0) != ' ') {
                // 見出し語の行（空白で始まらない）
                String dt = line.trim();
                LinkedList<String> kanaList = kanaMap.getWithoutNull(dt);
                sb.append("<dt id=\"");
                sb.append(id);
                if (!dtIdMap.containsKey(dt)) {
                    dtIdMap.put(dt, id);
                }
                id++;
                sb.append("\">");
                sb.append(dt);
                for (String kana : kanaList) {
                    sb.append("【");
                    sb.append(kana);
                    sb.append("】");
                }
                sb.append("</dt>");
                if (kanaList.isEmpty()) {
                    String kana = AppendJPKeys.makeKana(dt);
                    if (kana != null) {
                        sb.append("<key type=\"かな\">");
                        sb.append(kana);
                        sb.append("</key>");
                    } else if (!AppendJPKeys.isKana(dt.charAt(0))) {
                        System.err.println("WARN: missing kana: " + dt);
                    }
                }
                sb.append("<dd>");
            } else {
                // 解説の行（空白で始まる）
                sb.append("<p>");
                sb.append(line.trim());
                sb.append("</p>");
            }
        }
    }

    private static final String JPREF_REGEXP = "(⇒(.+?)．)|(\\(＝(.+?)\\))";
    private static final Pattern JPREF = Pattern.compile(JPREF_REGEXP);
    private static final String JPREFWORD_REGEXP = "([^, ]+)";
    private static final Pattern JPREFWORD = Pattern.compile(JPREFWORD_REGEXP);

    private void appendJPref(StringBuffer sb, String refs) {
        Matcher m = JPREFWORD.matcher(refs);
        int idx = 0;
        while (m.find()) {
            if (idx < m.start(1)) {
                sb.append(refs.substring(idx, m.start(1)));
            }
            String word = m.group(1);
            Integer id = dtIdMap.get(word);
            if (id != null) {
                sb.append(String.format("<a href=\"#%d\">%s</a>", id, word));
            } else {
                sb.append(word);
            }
            idx = m.end(1);
        }
        if (idx < refs.length()) {
            sb.append(refs.substring(idx));
        }
    }

    private String appendJPref(String line) {
        StringBuffer sb = new StringBuffer();
        Matcher m = JPREF.matcher(line);
        int idx = 0;
        while (m.find()) {
            int gno = ((m.group(1) != null) ? 2 : 4);
            if (idx < m.start(gno)) {
                sb.append(line.substring(idx, m.start(gno)));
            }
            appendJPref(sb, m.group(gno));
            idx = m.end(gno);
        }
        if (idx < line.length()) {
            sb.append(line.substring(idx));
        }
        return sb.toString();
    }

    private static final String LAREF_REGEXP = "([a-zA-Z]+)";
    private static final Pattern LAREF = Pattern.compile(LAREF_REGEXP);

    private void appendLArefPara(StringBuffer sb, String para) {
        Matcher m = LAREF.matcher(para);
        int idx = 0;
        while (m.find()) {
            if (idx < m.start(1)) {
                sb.append(para.substring(idx, m.start(1)));
            }
            String word = m.group(1);
            String key = word.toLowerCase();
            Integer id = latinIdMap.get(key);
            if (id != null) {
                sb.append(String.format("<a href=\"body-lajp.html#%d\">%s</a>", id, word));
            } else {
                sb.append(word);
            }
            idx = m.end(1);
        }
        if (idx < para.length()) {
            sb.append(para.substring(idx));
        }
    }

    private static final String PARA_REGEXP = "<p>(.+?)</p>";
    private static final Pattern PARA = Pattern.compile(PARA_REGEXP);

    private String appendLAref(String line) {
        StringBuffer sb = new StringBuffer();
        Matcher m = PARA.matcher(line);
        int idx = 0;
        while (m.find()) {
            if (idx < m.start(1)) {
                sb.append(line.substring(idx, m.start(1)));
            }
            appendLArefPara(sb, m.group(1));
            idx = m.end(1);
        }
        if (idx < line.length()) {
            sb.append(line.substring(idx));
        }
        return sb.toString();
    }

    private void outBody() {
        for (String line : entries) {
            System.out.println(appendLAref(appendJPref(line)));
        }
        System.err.println("done. " + entries.size() + " entries");
    }

    private void loadTxt(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));

        System.out.println("<html><body>");
        loadMidashi(br);
        loadBody(br);
        outBody();
        System.out.println("</body></html>");
    }

    public static void main(String[] args) {
        if (args.length >= 2) {
            MakeJpLaBodyHtml app = new MakeJpLaBodyHtml();
            try {
                ZephyrUtil.setShiftJisOuput();
                app.loadLAJPBody(args[1]);
                app.loadTxt(args[0]);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("Usage: java MakeJpLaBodyHtml "
                    + "kenkyusya-jpla.txt body-lajp.html");
        }
    }

}
