package zephyr.obunsha.petitroyal;

import java.io.File;
import java.sql.*;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import zephyr.util.UTF8toSJIS;
import zephyr.util.ZephyrUtil;

public class MakeBodyHtml {

    private String fixItem(String item) {
        String[][] replaces =
                {
                        { "\n", " " },
                        { "<span class='stm'>éの<の記号</span>", "<span class='stm'>éのˊの記号</span>" },
                        {
                                "<td colspan='3' class='kju'>◆母音または無音のｈで始まる男性単数名詞の前ではcetを用い，リエゾンする</span></td>",
                                "<td colspan='3' class='kju'>◆母音または無音のｈで始まる男性単数名詞の前ではcetを用い，リエゾンする</td>" },
                        { "<span class='gng'>新しい釘が古い釘を押し出す</span></td></tr>",
                                "<span class='gng'>新しい釘が古い釘を押し出す</span></td></tr></td></tr></table></td></tr>" },
                        { "<span class='yri'>semelles <i>compensees<</i></span>",
                                "<span class='yri'>semelles <i>compensees</i></span>" },
                        { "&copy;", "©" },
                        { "<b>価値</b>を下げる&</span>", "<b>価値</b>を下げる</span>" },
                        { "（＝活用de<clinaison）", "（＝活用declinaison）" /* CHECK */},
                        { "<table class='hyou'>  <tt>", "<table class='hyou'>  <tr>" },
                        { "desquelles</span></td></tr></td></tr></table></span></td></tr>",
                                "desquelles</td></tr></table></span></td></tr>" },
                        { "<span class='yry'>綴り字記号</span> ˊ  ˋ ˆ  ̈ など</span>",
                                "<span class='yry'>綴り字記号</span> ˊ  ˋ ˆ  ̈ など" },
                        { "<br>", "<br/>" },
                        { "<td>son <span class='phn'>sɔ̃ ソ<span class='skan'>ン</span></td>",
                                "<td>son <span class='phn'>sɔ̃ ソ</span><span class='skan'>ン</span></td>" },
                        {
                                "<span class='phn'>se-za-mi</span></span>  </td></tr></table></span></td></tr><tr><td>",
                                "<span class='phn'>se-za-mi</span></td></tr></table></span></td></tr><tr><td>" },
                        { "<span class='ykg'>$激しい苦痛&</span>", "<span class='ykg'>激しい苦痛</span>" },
                        { "<td>ton <span class='phn'>tɔ̃ ト<span class='skan'>ン</span></td>",
                                "<td>ton <span class='phn'>tɔ̃ ト</span><span class='skan'>ン</span></td>" },
                        { "<span class='phn'>tut</span>となる</span><br />",
                                "<span class='phn'>tut</span>となる</td></tr></table></span><br />" },
                        {
                                "<span class='phn'>tuz</span>となる</span><br />◆tousが代名詞のときは常に<span class='phn'>tus</span>と発音する</td></tr></table></span><span class='rsm'></span></td></tr>",
                                "<span class='phn'>tuz</span>となる<br />◆tousが代名詞のときは常に<span class='phn'>tus</span>と発音する</td></tr><span class='rsm'></span>" },
                        { "<td>un <span class='phn'>œ̃ ア<span class='skan'>ン</span></td>",
                                "<td>un <span class='phn'>œ̃ ア</span><span class='skan'>ン</span></td>" } };
        for (int i = 0; i < replaces.length; i++) {
            item = item.replace(replaces[i][0], replaces[i][1]);
        }
        return item;
    }

    private static final String CONJU_HEADER_REGEXP =
            "id='conjuBtn' value='活用表' onClick=\"showCTable\\('(.+?)'\\)\"";
    private static final Pattern CONJU_HEADER = Pattern.compile(CONJU_HEADER_REGEXP);
    private final LinkedList<String> invConj = new LinkedList<String>();

    private String conjuRef(String dt, String header) throws Exception {
        Matcher m = CONJU_HEADER.matcher(header);
        if (m.find()) {
            // NOTE: 40 items have "showCTable('●')"
            String id = m.group(1);
            if (id.matches("[A-Z][0-9]+")) {
                return "<conju>" + id + "</conju>";
            } else {
                String key = dt.replaceAll("<.+?>", "").replaceAll(",.*$", "");
                key = UTF8toSJIS.convert(key);
                invConj.add(key);
                return "";
            }
        } else {
            return "";
        }
    }

    private String getHead(String itemData) {
        int i;
        final String SPAN_END = "</span>";
        if (itemData.startsWith("<span ")) {
            i = itemData.indexOf(SPAN_END) + SPAN_END.length();
        } else {
            i = 0;
        }
        i = itemData.indexOf(SPAN_END, i);
        if (i > 0) {
            return itemData.substring(0, i);
        } else {
            System.err.println("ERROR: cannot find head: " + itemData);
            return "###";
        }
    }

    private static final String MIDASI_REGEXP =
            "<td class='midasi' name='top'><span class='mid' id='[0-9]+'>(.+)</span>";
    private static final Pattern MIDASI = Pattern.compile(MIDASI_REGEXP);

    private String auxMidasi(String header) {
        Matcher m = MIDASI.matcher(header);
        if (m.find()) {
            String[] midasi = m.group(1).split(", *");
            if (midasi.length > 1) {
                StringBuffer sb = new StringBuffer();
                for (int i = 0; i < midasi.length; i++) {
                    sb.append("<key type=\"表記\">");
                    sb.append(midasi[i]);
                    sb.append("</key>");
                }
                return sb.toString();
            }
        }
        return "";
    }

    private static final String[][] RANK_FIX = new String[21][2];

    static {
        RANK_FIX[20][0] = "<b>*** ";
        RANK_FIX[20][1] = "</b>";
        RANK_FIX[10][0] = "<b>** ";
        RANK_FIX[10][1] = "</b>";
        RANK_FIX[11][0] = "<b>* ";
        RANK_FIX[11][1] = "</b>";
        RANK_FIX[12][0] = "<b>";
        RANK_FIX[12][1] = "</b>";
        RANK_FIX[0][0] = "<b>";
        RANK_FIX[0][1] = "</b>";
        RANK_FIX[13][0] = "";
        RANK_FIX[13][1] = "";
    }

    private static final String ITEM_REGEXP =
            "<tr><td><span class=[\"']mid[\"'] rank=[\"']([0-9]+)[\"']>(.*)</td></tr>";
    private static final Pattern ITEM = Pattern.compile(ITEM_REGEXP);

    public void loadItems(String dbName) throws Exception {
        Class.forName("org.sqlite.JDBC");
        Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbName);
        Statement stat = conn.createStatement();
        ResultSet rs = stat.executeQuery("select id,header,html from items;");
        System.out.println("<html><body>");
        int nEntry = 0;
        while (rs.next()) {
            String id = rs.getString("id");
            String item = rs.getString("html");
            String header = rs.getString("header");
            Matcher m = ITEM.matcher(item);
            if (m.find()) {
                int rank = Integer.parseInt(m.group(1));
                String dt = getHead(m.group(2));
                String dd = fixItem(item);
                final String t = ">" + dt + "</span>";
                int idx = dd.indexOf(t);
                if (idx >= 0) {
                    // NOTE: drop dt text in dd text
                    dd = dd.substring(0, idx) + "></span>" + dd.substring(idx + t.length());
                }
                System.out.print("<dt id=\"");
                System.out.print(id);
                System.out.print("\">");
                System.out.print(RANK_FIX[rank][0]);
                System.out.print(dt);
                System.out.print(RANK_FIX[rank][1]);
                System.out.print("</dt>");
                System.out.print(auxMidasi(header));
                System.out.print("<dd>");
                System.out.print(conjuRef(dt, header));
                System.out.print(dd);
                System.out.println("</dd>");
                nEntry++;
                if ((nEntry % 1000) == 0) {
                    System.err.print(".");
                }
            } else {
                System.err.println("ERROR: " + item);
            }
        }
        System.out.println("</body></html>");
        rs.close();
        conn.close();

        System.err.println();
        System.err.println("done. " + nEntry + " entries");
        if (!invConj.isEmpty()) {
            System.err.print("WARN: invalid conju id :");
            for (String dt : invConj) {
                System.err.print(" " + dt);
            }
            System.err.println();
        }
    }

    static void checkDBFile(String dbName) {
        File f = new File(dbName);
        if (!f.isFile()) {
            System.err.println("**************************");
            System.err.println("ERROR: cannot find " + dbName);
            System.err.println("**************************");
            System.exit(1);
        }
        if (f.length() == 0) {
            System.err.println("**************************");
            System.err.println("ERROR: invalid " + dbName + ", size=0");
            System.err.println("**************************");
            System.exit(1);
        }
    }

    public static void main(String[] args) {
        String dbName = (args.length == 0 ? "petit.sql" : args[0]);
        checkDBFile(dbName);

        MakeBodyHtml app = new MakeBodyHtml();
        try {
            ZephyrUtil.setUTF8Ouput();
            app.loadItems(dbName);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
