package zephyr.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UTF8toSJIS {

    private final static TreeSet<Character> extChSet = new TreeSet<Character>();
    private final static HashMap<Integer, String> extFixMap = new HashMap<Integer, String>();

    private static void addExtFix(char c, String replace) {
        extFixMap.put((int) c, replace);
    }

    private static String kuromarunum(int i) {
        return Integer.toString(i);
    }

    private static String siromarunum(int i) {
        return Integer.toString(i);
    }

    private static String parnum(int i) {
        return "(" + i + ")";
    }

    static String code2hex(char c) {
        return String.format("&#x%x;", (int) c);
    }

    private static void addExtFixAsHex(char c) {
        addExtFix(c, code2hex(c));
    }

    private static void addExtFix(char c) {
        addExtFix(c, Character.toString(c));
    }

    static {
        addExtFix('➊', kuromarunum(1));
        addExtFix('➋', kuromarunum(2));
        addExtFix('➌', kuromarunum(3));
        addExtFix('➍', kuromarunum(4));
        addExtFix('➎', kuromarunum(5));
        addExtFix('➏', kuromarunum(6));
        addExtFix('➐', kuromarunum(7));
        addExtFix('➑', kuromarunum(8));
        addExtFix('➒', kuromarunum(9));
        addExtFix('➓', kuromarunum(10));
        addExtFix('⓫', kuromarunum(11));
        addExtFix('⓬', kuromarunum(12));
        addExtFix('⓭', kuromarunum(13));
        addExtFix('⓮', kuromarunum(14));
        addExtFix('⓯', kuromarunum(15));
        addExtFix('⓰', kuromarunum(16));
        addExtFix('⓱', kuromarunum(17));
        addExtFix('⓲', kuromarunum(18));
        addExtFix('⓳', kuromarunum(19));
        addExtFix('⓴', kuromarunum(20));
        //
        addExtFix('①', siromarunum(1));
        addExtFix('②', siromarunum(2));
        addExtFix('③', siromarunum(3));
        addExtFix('④', siromarunum(4));
        addExtFix('⑤', siromarunum(5));
        addExtFix('⑥', siromarunum(6));
        addExtFix('⑦', siromarunum(7));
        addExtFix('⑧', siromarunum(8));
        addExtFix('⑩', siromarunum(10));
        addExtFix('⑪', siromarunum(11));
        addExtFix('⑱', siromarunum(18));
        //
        addExtFix('⑴', parnum(1));
        addExtFix('⑵', parnum(2));
        addExtFix('⑶', parnum(3));
        addExtFix('⑷', parnum(4));
        addExtFix('⑸', parnum(5));
        addExtFix('⑹', parnum(6));
        addExtFix('⑺', parnum(7));
        //
        addExtFixAsHex('〖');
        addExtFixAsHex('〗');
        addExtFixAsHex('▷');
        //
        addExtFix('→');
        addExtFix('←');
        addExtFix('↑');
        addExtFix('↓');
        addExtFix('⇔');
        addExtFix('⇒');
        addExtFix('★');
        addExtFix('△');
        addExtFix('▽');
        addExtFix('◆');
        addExtFix('▼');
        addExtFix('◇');
        addExtFix('○');
        addExtFix('●');
        addExtFix('♀');
        addExtFix('♂');
        addExtFix('♭');
        addExtFix('♯');
        addExtFix('‐');
        addExtFix('―');
        addExtFix('━');
        addExtFix('†');
        addExtFix('℃');
        addExtFix('≪');
        addExtFix('［');
        addExtFix('］');
        addExtFix('…');
        addExtFix('√');
        addExtFix('【');
        addExtFix('】');
    }

    public static String convert(String line) {
        StringBuffer sb = new StringBuffer(line.length());

        for (int i = 0; i < line.length(); i++) {
            int code = line.codePointAt(i);
            String rep = extFixMap.get(code);
            if (rep != null) {
                sb.append(rep);
            } else if (code >= 0xff00 || (0x0391 <= code && code <= 0x3c9)) {
                sb.append(line.charAt(i));
            } else if ((0x80 <= code && code < 0x400) || (0x500 <= code && code < 0x3000)
                    || (0xE000 <= code)) {
                // NOTE: 0x400..4ff is Cyrillic characters
                sb.append(String.format("&#x%x;", code));
                extChSet.add(line.charAt(i));
            } else if (code == 0x301c) {
                // http://park3.wakwak.com/~ozashin/sw_tips/webapp_tips/sjis_charset.html
                // 0x301C (WAVE DASH) ===> 0xFF5E (FULL WIDTH TILDE)
                sb.append((char) 0xFF5E);
            } else {
                sb.append(line.charAt(i));
            }
        }
        return sb.toString();
    }

    private void output(String line) {
        System.out.println(line);
    }

    private void convert(String[] args) throws Exception {
        InputStream in;
        if ((args.length > 0) && !args[0].equals("-")) {
            in = new FileInputStream(new File(args[0]));
        } else {
            in = System.in;
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
        String line;

        while ((line = br.readLine()) != null) {
            output(convert(line));
        }
    }

    private static final String UNICODE_REGEXP = "unicode=\"#x([^\"]*)\"";
    private static final Pattern convpat = Pattern.compile(UNICODE_REGEXP);

    private HashSet<Integer> gaijiUniSet = new HashSet<Integer>();

    private void loadGaijiMap(String file) throws Exception {
        InputStream in = new FileInputStream(new File(file));
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String line;
        Matcher m;
        while ((line = br.readLine()) != null) {
            m = convpat.matcher(line);
            if (m.find()) {
                int code = Integer.parseInt(m.group(1), 16);
                gaijiUniSet.add(code);
            }
        }

    }

    private void checkExtChSet(String[] args) throws Exception {
        String file = "../../ebwin/GaijiMap.xml";
        if (args.length >= 3) {
            if (args[2].equals("arabic")) {
                file = "../../ebwin/ArabicGaijiMap.xml";
            } else if (args[2].equals("sanskrit")) {
                file = "../../ebwin/SanskritGaijiMap.xml";
            } else {
                file = args[2];
            }
        }
        loadGaijiMap(file);

        for (Character ch : extChSet.toArray(new Character[0])) {
            int code = ch.charValue();
            if (!gaijiUniSet.contains(code)) {
                String m;
                m = String.format("&#x%04X=%c is not in %s", code, ch, file);
                System.err.println("WARN: " + m);
            }
        }
    }

    public static void main(String[] args) {
        ZephyrUtil.setShiftJisOuput();

        try {
            UTF8toSJIS apl = new UTF8toSJIS();
            apl.convert(args);
            if (args.length >= 2 && args[1].equals("check")) {
                apl.checkExtChSet(args);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
