package zephyr.kenkyusya.lajp;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.UTF8toSJIS;
import zephyr.util.UnescapeChars;
import zephyr.util.ZephyrUtil;

public class CheckLaJpBody {

    private static class FixEntry {
        final String before, after;

        FixEntry(String before, String after) {
            this.before = before;
            this.after = after;
        }
    }

    private final HashMap<String, FixEntry> fixMap = new HashMap<String, FixEntry>();

    private void loadLaJpFix(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        while ((line = br.readLine()) != null) {
            String[] words = line.split("\t");
            if (words.length == 3) {
                fixMap.put(words[0], new FixEntry(words[1], words[2]));
            } else if (words.length == 2) {
                fixMap.put(words[0], new FixEntry(words[0], words[1]));
            } else {
                System.err.println("invalid lajpfix line: " + line);
            }
        }
        System.err.println("loaded " + fixMap.size() + " fix entries");
    }

    private static class EsEntry {
        final String dt;
        final LinkedList<String> lines = new LinkedList<String>();

        EsEntry(String dt) {
            this.dt = dt;
        }
    }

    private final LinkedList<EsEntry> esEntries = new LinkedList<EsEntry>();

    private static final String HEXCODE_REGEXP = "(0[0-9A-F]{3}|762D)";
    private static final Pattern HEXCODE = Pattern.compile(HEXCODE_REGEXP);

    private String toUnicode(String line) {
        StringBuffer sb = new StringBuffer();
        int i = 0;
        Matcher m = HEXCODE.matcher(line);
        while (m.find()) {
            if (i < m.start()) {
                sb.append(line.substring(i, m.start()));
            }
            int code = Integer.parseInt(m.group(1), 16);
            char c = Character.toUpperCase((char) code);
            sb.append(c);
            i = m.end();
        }
        if (i < line.length()) {
            sb.append(line.substring(i));
        }
        return sb.toString();
    }

    private void loadEsbody(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        boolean bNew = true;
        EsEntry entry = null;
        while ((line = br.readLine()) != null) {
            if (line.equals(EstraierLoader.ENTRY_DELIM)) {
                bNew = true;
                continue;
            }
            line = toUnicode(line);
            if (bNew) {
                entry = new EsEntry(line);
                esEntries.add(entry);
                bNew = false;
            } else {
                entry.lines.add(line);
            }
        }
        System.err.println("loaded " + esEntries.size() + " entries");
    }

    private static final String WORD_REGEXP = "([a-zA-ZĀĒĪŌŪ]+)";
    private static final Pattern WORD = Pattern.compile(WORD_REGEXP);

    private String compare(String dt, String line0, String esline) {
        String kline = line0.toUpperCase();
        if (kline.equals(esline)) {
            return line0;
        }
        //
        final String dropChars = "（）［］《》〈〉＋～’「」";
        for (int i = 0; i < dropChars.length(); i++) {
            kline = kline.replace(dropChars.substring(i, i + 1), "");
        }
        final String[][] replaces =
                { { "Á", "AACUTE" }, { "Â", "ACIRC" }, { "Ç", "CCEDIL" }, { "É", "EACUTE" },
                        { "È", "EGRAVE" }, { "Ë", "EUML" }, { "Í", "IACUTE" }, { "Î", "ICIRC" },
                        { "Ñ", "NTILDE" }, { "Ó", "OACUTE" }, { "Ö", "OUML" }, { "Ô", "OCIRC" },
                        { "Ú", "UACUTE" }, { "Ü", "UUML" } };
        for (int i = 0; i < replaces.length; i++) {
            kline = kline.replace(replaces[i][0], replaces[i][1]);
        }
        //
        Matcher m1 = WORD.matcher(kline);
        Matcher m2 = WORD.matcher(esline);
        String fline = null;
        int nFixed = 0;
        while (m1.find()) {
            if (m2.find() == false) {
                System.err.println("ERR: dt=" + dt + "\n\tkline=" + line0 + "\n\teline=" + esline);
                invalidEntry(dt);
                break;
            }
            String w1 = m1.group(1);
            String w2 = m2.group(1);
            if (!w1.equals(w2)) {
                String tline = makeFixedWord(line0, w1, w2);
                if (tline == null) {
                    System.err.println("WARN: dt=" + dt + "\n\tkline=" + line0 + "\n\teline="
                            + esline);
                    invalidEntry(dt);
                } else if (nFixed == 0) {
                    fline = tline;
                }
                nFixed++;
            }
        }
        if (nFixed == 1) {
            return fline;
        } else {
            return null;
        }
    }

    private String keyForEstraier(String key) {
        final String[][] replaces =
                { { "．", "." }, { "《", "" }, { "》", "" }, { "＊", "*" }, { "Ï", "IUML" },
                        { "［", "" }, { "］", "" } };
        key = key.toUpperCase();
        for (int i = 0; i < replaces.length; i++) {
            key = key.replace(replaces[i][0], replaces[i][1]);
        }
        return key;
    }

    private int nFixed = 0;

    private String fixedKeyByEs(String key, String eskey) {
        if (key.length() == eskey.length() - 1) {
            int ndrop = 0;
            int dIdx = -1;
            char dChar = 0;
            for (int i = 0; i < key.length(); i++) {
                char c1 = key.charAt(i - ndrop);
                char c2 = eskey.charAt(i);
                if (c1 == c2) {
                    continue;
                } else {
                    switch (c2) {
                    case 'Y':
                    case 'I':
                    case 'E':
                    case 'O':
                    case 'U':
                        if (dIdx < 0) {
                            dIdx = i;
                            ndrop = 1;
                            dChar = c2;
                        } else {
                            return key;
                        }
                        break;
                    default:
                        return key;
                    }
                }
            }
            if (dIdx >= 0) {
                nFixed++;
                return key.substring(0, dIdx) + dChar + key.substring(dIdx);
            } else {
                // key="ALICUB", eskey="ALICUBI" etc
                return eskey;
            }
        }
        return key;
    }

    private static final HashMap<Character, String> extCharsMap = new HashMap<Character, String>();

    static {
        extCharsMap.put('Y', "ȳ");
        extCharsMap.put('I', "ī˘");
        extCharsMap.put('E', "ē˘");
        extCharsMap.put('O', "ō˘");
        extCharsMap.put('U', "ū˘");
    }

    private String makeFixedWord(String word, String key, String eskey) {
        if (key.length() != eskey.length() - 1) {
            return null;
        }
        int i;
        for (i = 0; i < key.length(); i++) {
            char c1 = key.charAt(i);
            char c2 = eskey.charAt(i);
            if (c1 != c2) {
                break;
            }
        }
        String ex = extCharsMap.get(eskey.charAt(i));
        if (ex == null) {
            return null;
        }
        String dt2 = word.toUpperCase();
        int idx = dt2.indexOf(key);
        if (idx >= 0) {
            nFixed++;
            int k = idx + i;
            if (k < word.length()) {
                return word.substring(0, k) + ex + word.substring(k);
            } else {
                return word.substring(0, k) + ex;
            }
        } else {
            return null;
        }
    }

    private void invalidEntry(String dt) {
        System.err.println();
        System.err.println("*********************");
        String t = UnescapeChars.unescapeCode(UTF8toSJIS.convert(dt));
        System.err.println("ERROR: headword=" + dt + "\t" + t);
        System.exit(1);
    }

    private void loadLaJpBody(String filename) throws Exception {
        InputStream in = new FileInputStream(new File(filename));
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;
        int idx = 0;
        EsEntry entry = null;
        String dt = "";
        FixEntry fixEntry = null;
        System.err.print("fixing Latin-Japanese entries ");

        while ((line = br.readLine()) != null) {
            if (line.equals(MakeBodyHtml.ENTRY_DELIM)) {
                System.out.println(line);
                continue;
            }
            String trline = line.trim();
            if (trline.isEmpty()) {
                continue;
            }
            if (trline.equals("［↑］") || trline.equals("［↓］")) {
                System.out.println(line);
                continue;
            }
            if (line.charAt(0) != ' ') {
                fixEntry = fixMap.get(trline);
                boolean bFixed = false;
                if (fixEntry != null) {
                    String trline2 = trline.replace(fixEntry.before, fixEntry.after);
                    if (!trline2.equals(trline)) {
                        System.out.println(trline2);
                        bFixed = true;
                    }
                }
                dt = trline;
                entry = esEntries.get(idx);
                if (!bFixed) {
                    String key = keyForEstraier(trline);
                    if (!key.equals(entry.dt)) {
                        String key2 = fixedKeyByEs(key, entry.dt);
                        if (!key2.equals(entry.dt)) {
                            invalidEntry(dt);
                        } else {
                            dt = makeFixedWord(dt, key, entry.dt);
                        }
                    }
                    System.out.println(dt);
                }
                idx++;
                if ((idx % 1000) == 0) {
                    System.err.print(".");
                }
            } else {
                boolean bFixed = false;
                if (fixEntry != null) {
                    String line2 = line.replace(fixEntry.before, fixEntry.after);
                    if (!line2.equals(line)) {
                        System.out.println(line2);
                        bFixed = true;
                    }
                }
                String cmpline = entry.lines.poll();
                if (!bFixed) {
                    if (cmpline != null) {
                        String kline = compare(dt, line, cmpline);
                        if (kline != null && kline != line) {
                            // line = "%%%" + kline + "%%%";
                            line = kline;
                        }
                    } else {
                        System.err.println("ERROR: cannot get esBodyLines for " + entry.dt);
                        System.err.println("\t" + line);
                    }
                    System.out.println(line);
                }
            }
        }
        System.err.println();
        System.err.println("done. " + idx + " entries. " + nFixed + " fixed.");
    }

    public static void main(String[] args) {
        if (args.length >= 3) {
            ZephyrUtil.setUTF8Ouput();
            CheckLaJpBody app = new CheckLaJpBody();
            try {
                app.loadLaJpFix(args[2]);
                app.loadEsbody(args[1]);
                app.loadLaJpBody(args[0]);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("Usage: java CheckLaJpBody "
                    + "kenkyusya-lajp.txt esloader.txt lajpfix.txt");
        }
    }

}
