/*
 * Decompiled with CFR 0.152.
 */
package zephyr.kenkyusha.encol;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import zephyr.kenkyusha.encol.MecabUtil;
import zephyr.kenkyusha.encol.OpenNLPUtil;
import zephyr.kenkyusha.encol.Util;
import zephyr.kenkyusha.encol.Word;
import zephyr.kenkyusha.encol.Youso;
import zephyr.util.MultiLinkedHashMap;
import zephyr.util.MultiTreeMap;

public class MakeEnCorpus {
    private final OpenNLPUtil nlp;
    private final MecabUtil mecab;
    private final HashMap<String, Word> collectedWords = new HashMap();
    private static final String RET = "\r\n";
    private static final int SHOW_MIN = 3;
    private static final Pattern DT_NUM = Pattern.compile(", ([0-9]+)\">");
    private int pid = 0;

    MakeEnCorpus(OpenNLPUtil openNLPUtil, MecabUtil mecabUtil) {
        this.nlp = openNLPUtil;
        this.mecab = mecabUtil;
    }

    private void parse(String string) {
        String string2 = Util.getEnglish(string);
        string2 = string2.replaceAll("\\([as]\\)", "");
        string2 = string2.replaceAll(" (a|an|the|) ", " ");
        string2 = string2.replaceAll("^(A|An|The|) ", " ");
        string2 = string2.replaceAll("([^a-zA-Z ]+)", " $1 ");
        String[] stringArray = string2.split(" +");
        String[] stringArray2 = this.nlp.tag(stringArray);
        block3: for (int i = 0; i < stringArray.length; ++i) {
            if (stringArray[i].length() <= 1) continue;
            Word word = new Word(stringArray[i], stringArray2[i], true);
            switch (word.pos) {
                case PRON: 
                case OTHER: {
                    continue block3;
                }
                default: {
                    Word word2 = this.collectedWords.get(word.key());
                    if (word2 == null) {
                        this.collectedWords.put(word.key(), word);
                        word2 = word;
                    } else {
                        ++word2.num;
                    }
                    Word word3 = word2;
                    int n = 0;
                    int n2 = 1;
                    while (i + n2 < stringArray.length) {
                        if (stringArray[i + n2].length() > 1) {
                            Word word4 = word2.addNextWord(stringArray[i + n2], stringArray2[i + n2]);
                            Word word5 = word3.addNextWordFrom(stringArray[i + n2], stringArray2[i + n2]);
                            if (word5 != null) {
                                if (word4 != word5) {
                                    word5.addSentence(string);
                                }
                                word3 = word5;
                            }
                            if (word4 != null) {
                                word4.addSentence(string);
                                word2 = word4;
                                if (++n >= 3) continue block3;
                            }
                        }
                        ++n2;
                    }
                    break block0;
                }
            }
        }
    }

    private String[] makeKeys(String string, String string2, String string3, String string4) {
        String string5;
        String[] stringArray = new String[2];
        if (string3.equals(string4)) {
            stringArray[0] = "";
            stringArray[1] = "";
        } else {
            stringArray[0] = "<key>" + string4 + "</key>";
            stringArray[1] = " (" + string4 + ")";
        }
        if (!string.equals(string2) && !(string5 = string3.replaceFirst("^([^ ]+)", string2)).equals(string4)) {
            stringArray[0] = stringArray[0] + "<key>" + string5 + "</key>";
            stringArray[1] = stringArray[1] + " (" + string5 + ")";
        }
        return stringArray;
    }

    private void addToHistMap(String string, Word word, MultiTreeMap<Integer, String> multiTreeMap) {
        String string2 = string;
        for (Word word2 : word.nexts.getAll()) {
            String string3 = word2.word.equals(word2.pos.abbr) ? string2 : string2 + " " + word2.word;
            if (word2.nexts == null || word2.num < 3) {
                Word[] wordArray = new StringBuilder();
                String string4 = word.word + " " + word2.word;
                String[] stringArray = this.makeKeys(word.word, string, string4, string3);
                wordArray.append(String.format("<dt title=\"%s, %s, %d\">%s</dt>%s<dd>, %s, %d%s\r\n", string4, word.pos.abbr2, word2.list.size(), string4, stringArray[0], word.pos.abbr2, word2.list.size(), stringArray[1]));
                for (String string5 : word2.list) {
                    wordArray.append("<p>");
                    wordArray.append(Util.SENTENCE_HEAD);
                    wordArray.append(string5);
                    wordArray.append("</p>\r\n");
                }
                wordArray.append("</dd>");
                multiTreeMap.put(word2.list.size(), wordArray.toString());
                continue;
            }
            for (Word word3 : word2.nexts.getAll()) {
                String string5;
                string5 = word3.word.equals(word3.pos.abbr) ? string3 : string3 + " " + word3.word;
                if (word3.nexts == null || word3.num < 3) {
                    Word[] wordArray = new StringBuilder();
                    String string6 = word.word + " " + word2.word + " " + word3.word;
                    String[] stringArray = this.makeKeys(word.word, string, string6, string5);
                    wordArray.append(String.format("<dt title=\"%s, %s, %d\">%s</dt>%s<dd>, %s, %d%s\r\n", string6, word.pos.abbr2, word3.list.size(), string6, stringArray[0], word.pos.abbr2, word3.list.size(), stringArray[1]));
                    for (String string7 : word3.list) {
                        wordArray.append("<p>");
                        wordArray.append(Util.SENTENCE_HEAD);
                        wordArray.append(string7);
                        wordArray.append("</p>\r\n");
                    }
                    wordArray.append("</dd>");
                    multiTreeMap.put(word3.list.size(), wordArray.toString());
                    continue;
                }
                for (Word word4 : word3.nexts.getAll()) {
                    String string7;
                    string7 = word4.word.equals(word4.pos.abbr) ? string5 : string5 + " " + word4.word;
                    StringBuilder stringBuilder = new StringBuilder();
                    String string8 = word.word + " " + word2.word + " " + word3.word + " " + word4.word;
                    String[] stringArray = this.makeKeys(word.word, string, string8, string7);
                    stringBuilder.append(String.format("<dt title=\"%s, %s, %d\">%s</dt>%s<dd>, %s, %d%s\r\n", string8, word.pos.abbr2, word4.num, string8, stringArray[0], word.pos.abbr2, word4.num, stringArray[1]));
                    for (String string9 : word4.list) {
                        stringBuilder.append("<p>");
                        stringBuilder.append(Util.SENTENCE_HEAD);
                        stringBuilder.append(string9);
                        stringBuilder.append("</p>\r\n");
                    }
                    stringBuilder.append("</dd>");
                    multiTreeMap.put(word4.list.size(), stringBuilder.toString());
                }
            }
        }
    }

    private void outputEntry(String string) {
        boolean bl;
        Matcher matcher = DT_NUM.matcher(string);
        matcher.find();
        int n = Integer.parseInt(matcher.group(1));
        if (n < 30) {
            System.out.println(string);
            return;
        }
        HashMap<String, Word> hashMap = new HashMap<String, Word>();
        String[] stringArray = string.split(RET);
        HashSet<String> hashSet = new HashSet<String>();
        for (int i = 1; i <= stringArray.length - 2; ++i) {
            String string2 = stringArray[i].substring(3, stringArray[i].length() - 4);
            String string3 = Util.getJapanese(string2);
            ArrayList<Youso> arrayList = this.mecab.tag(string3);
            block4: for (Youso youso : arrayList) {
                switch (youso.hinshi) {
                    case JOSHI: 
                    case JYOV: 
                    case PUNC: 
                    case OTHER: {
                        continue block4;
                    }
                }
                Iterator iterator = (Word)hashMap.get(youso.txt);
                if (iterator == null) {
                    iterator = new Word(youso.txt);
                    hashMap.put(youso.txt, (Word)((Object)iterator));
                }
                ((Word)((Object)iterator)).addSentence(string2);
                hashSet.add(string2);
            }
        }
        Object[] objectArray = hashMap.values().toArray(new Word[0]);
        Arrays.sort(objectArray);
        double d = 0.33 * (double)n;
        boolean bl2 = bl = (double)((Word)objectArray[0]).list.size() >= d;
        if (bl) {
            int n2;
            System.out.println(stringArray[0]);
            int n3 = this.pid;
            System.out.print("<p> ");
            for (n2 = 0; n2 < objectArray.length && (double)((Word)objectArray[n2]).list.size() >= d; ++n2) {
                System.out.printf("%s<a href=\"#%d\">%s</a> (%d, %.1f%%)", n2 == 0 ? "" : ", ", n3 + n2, ((Word)objectArray[n2]).word, ((Word)objectArray[n2]).list.size(), 100.0 * (double)((Word)objectArray[n2]).list.size() / (double)n);
            }
            if (n2 <= objectArray.length - 2) {
                System.out.printf(", <a href=\"#%d\">\u305d\u306e\u4ed6</a>", n3 + n2);
            }
            System.out.println("</p>");
            for (n2 = 0; n2 < objectArray.length && (double)((Word)objectArray[n2]).list.size() >= d; ++n2) {
                System.out.printf("<p><br><br></p><p id=\"%d\">\u25bc<b>%s</b> (%d, %.1f%%)</p>\r\n", n3 + n2, ((Word)objectArray[n2]).word, ((Word)objectArray[n2]).list.size(), 100.0 * (double)((Word)objectArray[n2]).list.size() / (double)n);
                for (String string4 : ((Word)objectArray[n2]).list) {
                    System.out.println("<p>" + string4 + "</p>");
                    hashSet.remove(string4);
                }
            }
            if (hashSet.size() > 0) {
                System.out.println("<p><br><br></p><p id=\"" + (n3 + n2) + "\"><b>\u25bc\u305d\u306e\u4ed6</b></p>");
                for (String string4 : hashSet) {
                    System.out.println("<p>" + string4 + "</p>");
                }
            }
            this.pid = n3 + n2 + 1;
            System.out.println("</dd>");
        } else {
            System.out.println(string);
        }
    }

    private String word2lemma(Word word) {
        switch (word.pos) {
            case NOUN: 
            case VERB: 
            case MD: {
                return this.nlp.word2lemma(word.word, word.tag);
            }
        }
        return word.word;
    }

    public void output(LinkedList<String> linkedList) {
        System.err.println("\u82f1\u8a9e\u7528\u4f8b\u89e3\u6790");
        int n = 0;
        for (String object2 : linkedList) {
            this.parse(object2);
            if (++n % 10000 != 0) continue;
            if (n % 100000 == 0) {
                System.err.print(n / 100000);
                continue;
            }
            System.err.print(".");
        }
        System.err.println();
        Object[] objectArray = this.collectedWords.keySet().toArray(new String[0]);
        Arrays.sort(objectArray);
        System.err.println("\u82f1\u8a9e\u89e3\u6790\u7d50\u679c\u51fa\u529b");
        MultiLinkedHashMap multiLinkedHashMap = new MultiLinkedHashMap();
        for (Object object : objectArray) {
            Comparable<Word> comparable = this.collectedWords.get(object);
            char c = comparable.word.charAt(0);
            if (comparable.nexts == null || comparable.num < 3 || 'a' > c || c > 'z') continue;
            multiLinkedHashMap.put(this.word2lemma((Word)comparable), comparable);
        }
        MultiTreeMap multiTreeMap = new MultiTreeMap(new IntReverse());
        n = 0;
        for (String string : multiLinkedHashMap.keySet()) {
            multiTreeMap.clear();
            for (Comparable<Word> comparable : multiLinkedHashMap.getList(string)) {
                this.addToHistMap(string, (Word)comparable, multiTreeMap);
            }
            for (Comparable<Word> comparable : multiTreeMap.keySet()) {
                for (String string2 : multiTreeMap.getList((Integer)comparable)) {
                    this.outputEntry(string2);
                }
            }
            if (++n % 1000 != 0) continue;
            if (n % 10000 == 0) {
                System.err.print(n / 10000);
                continue;
            }
            System.err.print(".");
        }
        System.err.println();
    }

    private static class IntReverse
    implements Comparator<Integer> {
        private IntReverse() {
        }

        @Override
        public int compare(Integer n, Integer n2) {
            return n2 - n;
        }
    }
}

