package zephyr.kenkyusya.lajp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.ZephyrUtil;

public class MakeBodyHtml {

    static final String ENTRY_DELIM = "======";

    private String makeHeadkey(String dt) {
        int idx = dt.indexOf(',');
        if (idx > 0) {
            dt = dt.substring(0, idx);
        }
        idx = dt.indexOf(' ');
        if (idx > 0) {
            dt = dt.substring(0, idx);
        }
        return dt;
    }

    private final HashMap<String, Integer> bodyIdMap = new HashMap<String, Integer>();

    private void addBodyIdKey(String key, int id, boolean bCheck) {
        if (!bodyIdMap.containsKey(key)) {
            bodyIdMap.put(key, id);
        } else if (bCheck) {
            // System.err.println("WARN: duplicated key: " + key + ", id=" +
            // id);
        }
    }

    private void loadHeadwords(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        int id = 0;
        boolean bNextDt = true;
        while ((line = br.readLine()) != null) {
            String trline = line.trim();
            if (trline.isEmpty()) {
                continue;
            }
            if (line.equals(ENTRY_DELIM)) {
                bNextDt = true;
            } else if (bNextDt) {
                String key = makeHeadkey(trline);
                addBodyIdKey(key, id, true);
                String key2 = ZephyrUtil.makeKey(key, false);
                boolean hasDialects = (!key.equals(key2));
                if (hasDialects) {
                    addBodyIdKey(key2, id, false);
                }
                if (Character.isDigit(key.charAt(key.length() - 1))) {
                    addBodyIdKey(key.substring(0, key.length() - 1), id, false);
                    if (hasDialects) {
                        addBodyIdKey(key2.substring(0, key2.length() - 1), id, false);
                    }
                }
                id++;
                bNextDt = false;
            }
        }
        System.err.println("loaded " + id + " entries from " + filename);
        br.close();
        in.close();
    }

    private Integer getBodyId(String word) {
        Integer bodyId = bodyIdMap.get(word);
        if (bodyId != null) {
            return bodyId;
        }
        if (Character.isDigit(word.charAt(word.length() - 1))) {
            bodyId = bodyIdMap.get(word.substring(0, word.length() - 1));
            if (bodyId != null) {
                // System.err.println("INFO: use num dropped key: " + word);
                return bodyId;
            }
        }
        return null;
    }

    private void addKeyHyoki(StringBuffer sb, String dt) {
        String[] words = dt.split(", *");
        if (words.length > 1) {
            TreeSet<String> keys = new TreeSet<String>();
            for (String w : words) {
                String key = ZephyrUtil.makeKey(w, true);
                if (!keys.contains(key)) {
                    sb.append("<key type=\"表記\">");
                    sb.append(key);
                    sb.append("</key>");
                    keys.add(key);
                }
            }
        }
    }

    private static final String NUM_REGEXP = "^([0-9]+|I+)[ \\.]";
    private static final Pattern NUM = Pattern.compile(NUM_REGEXP);

    private String fixNumbering(String line) {
        Matcher m = NUM.matcher(line);
        if (m.find()) {
            String num = m.group(1);
            String txt = "";
            txt += "<b>" + num + "</b>";
            if (!m.group().endsWith(" ")) {
                txt += " ";
            }
            txt += line.substring(m.end(1));
            return txt;
        } else {
            return line;
        }
    }

    private static final String REF_PREV = "↑";
    private static final String REF_NEXT = "↓";

    private String makeRefById(int id, String word) {
        return "<a href=\"#" + id + "\">" + word + "</a>";
    }

    private static final String REF_REGEXP = "[⇒＝] *(.+?)[． ()》,]";
    private static final Pattern REF = Pattern.compile(REF_REGEXP);

    private String fixRef(String line) {
        Matcher m = REF.matcher(line);
        int k = 0;
        StringBuffer sb = new StringBuffer();
        while (m.find()) {
            if (k < m.start(1)) {
                sb.append(line.substring(k, m.start(1)));
            }
            String word = m.group(1);
            Integer bodyId = getBodyId(word);
            if (bodyId != null) {
                sb.append(makeRefById(bodyId, word));
            } else {
                sb.append(word);
            }
            k = m.end(1);
        }
        if (k < line.length()) {
            sb.append(line.substring(k));
        }
        return sb.toString();
    }

    private static final String REFWORD_REGEXP = "[^/ ]+";
    private static final Pattern REFWORD = Pattern.compile(REFWORD_REGEXP);

    private static boolean CHECK_REF = false;

    private String fixRefInBracket(String line, String dt, int id) {
        int idx = line.indexOf('［');
        if (idx < 0) {
            return line;
        }
        idx++;
        int idx2 = line.indexOf('］', idx);
        if (idx2 < 0) {
            System.err.println("ERROR: " + line);
        }
        String ref = line.substring(idx, idx2);
        //
        StringBuffer sb = new StringBuffer();
        Matcher m = REFWORD.matcher(ref);
        int k = 0;
        while (m.find()) {
            if (k < m.start()) {
                sb.append(ref.substring(k, m.start()));
            }
            String word = m.group();
            if (word.equals(REF_PREV)) {
                sb.append(makeRefById(id - 1, REF_PREV));
            } else if (word.equals(REF_NEXT)) {
                sb.append(makeRefById(id + 1, REF_NEXT));
            } else {
                Integer bodyId = getBodyId(word);
                if (bodyId != null) {
                    sb.append(makeRefById(bodyId, word));
                } else {
                    sb.append(word);
                    if (CHECK_REF && !word.startsWith("Gk") && !word.startsWith("-")
                            && !word.startsWith("cf.") && (word.charAt(0) < 0x1000)) {
                        System.err.println("WARN: cannot find refkey: " + word + " at " + dt);
                    }
                }
            }
            k = m.end();
        }
        if (k < ref.length()) {
            sb.append(ref.substring(k));
        }
        return line.substring(0, idx) + sb.toString() + line.substring(idx2);
    }

    private static final String PARA_REGEXP = "<p>(<b>)?.+?</p>";
    private static final Pattern PARA = Pattern.compile(PARA_REGEXP);

    private String fixIndent(String line) {
        StringBuffer sb = new StringBuffer();
        Matcher m = PARA.matcher(line);
        int idx = 0;
        int indent = 1;
        boolean hasNum = false;
        while (m.find()) {
            if (idx < m.start(0)) {
                sb.append(line.substring(idx, m.start(0)));
            }
            int newIndent;
            if (!hasNum && (m.group(1) != null)) {
                hasNum = true;
            }
            if (hasNum && m.group(1) == null) {
                newIndent = 2;
            } else {
                newIndent = 1;
            }
            if (indent != newIndent) {
                sb.append("<indent val=\"");
                sb.append(newIndent);
                sb.append("\">");
                indent = newIndent;
            }
            sb.append(m.group(0));
            idx = m.end(0);
        }
        if (idx < line.length()) {
            sb.append(line.substring(idx));
        }
        return sb.toString();
    }

    private String fixEntry(String line) {
        line = line.replaceFirst("</p><p>(［.+?］</p>)", " $1");
        line = fixIndent(line);
        return line;
    }

    private void loadTxt(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        System.out.println("<html><body>");
        int id = 0, ddidx = 0;
        String dt = "";
        StringBuffer sb = null;
        while ((line = br.readLine()) != null) {
            String trline = line.trim();
            if (trline.isEmpty()) {
                continue;
            }
            if (line.equals(ENTRY_DELIM)) {
                sb.append("</dd>");
                System.out.println(fixEntry(sb.toString()));
                sb = null;
                id++;
                if ((id % 1000) == 0) {
                    System.err.print(".");
                }
            } else if (line.charAt(0) != ' ') {
                sb = new StringBuffer();
                sb.append("<dt id=\"");
                sb.append(id);
                sb.append("\">");
                dt = trline;
                sb.append(dt);
                sb.append("</dt>");
                addKeyHyoki(sb, trline);
                sb.append("<dd>");
                ddidx = 0;
            } else {
                String txt = fixNumbering(trline);
                if (ddidx < 2) {
                    txt = fixRefInBracket(txt, dt, id);
                }
                txt = fixRef(txt);
                sb.append("<p>");
                sb.append(txt);
                sb.append("</p>");
                ddidx++;
            }
        }
        System.out.println("</body></html>");
        br.close();
        in.close();
        System.err.println();
        System.err.println("done. " + id + " entries");
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            ZephyrUtil.setUTF8Ouput();
            MakeBodyHtml app = new MakeBodyHtml();
            try {
                app.loadHeadwords(args[0]);
                app.loadTxt(args[0]);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("Usage: java MakeBodyHtml SOME_DIR/kenkyusya-lajp.txt");
        }
    }

}
