#!/bin/sh

MOZCVER="2.20.2677.102"
DICVER="20171008"

# If you want to use an English-Japanese dictionary,
# uncomment the following line.
#EJDIC="true"

# If you want to make redistributable mozc-ut,
# don't change the following line.
#NICODIC="true"


# ==============================================================================
# remove tmp files
# ==============================================================================

rm -rf ../mozc-$MOZCVER
rm -rf ../mozc-ut2-$MOZCVER.$DICVER
rm -rf ../mozc-ut2-$MOZCVER.$DICVER.tar*
rm -f chimei/*.zip
rm -f chimei/*.CSV*
rm -f chimei/*.py
rm -f src/id.def
rm -f src/zipcode.costs


# ==============================================================================
# extract official mozc and filter entries
# ==============================================================================

# extract official mozc
echo "extract official mozc..."
cd ../
tar xf mozc-$MOZCVER.tar.bz2
cd -

echo "filter mozc entries..."
cd src/
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary00.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary01.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary02.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary03.txt &
wait
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary04.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary05.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary06.txt &
wait
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary07.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary08.txt &
ruby filter-mozc-entries.rb ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary09.txt &
wait
cd -

cd ../mozc-$MOZCVER/src/data/dictionary_oss/
rm dictionary*.txt

for file in *.txt.filt; do
   mv $file ${file%.txt.filt}.txt;
done
cd -

echo "remove mozc duplicates..."
cat ../mozc-$MOZCVER/src/data/dictionary_oss/dictionary*.txt > src/mozcdict
cd src/
ruby remove-mozc-duplicates.rb mozcdict
mv mozcdict.remdup mozcdict
cd -

# get hinsi ID
cp ../mozc-$MOZCVER/src/data/dictionary_oss/id.def src/


# ==============================================================================
# generate placenames and ZIP codes
# ==============================================================================

# get zip code data
echo "get zip code data..."
cd chimei/
wget http://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip
wget http://www.post.japanpost.jp/zipcode/dl/jigyosyo/zip/jigyosyo.zip
unzip ken_all.zip
unzip jigyosyo.zip

# modify zip code data
echo "modify zip code data..."
ruby modify-zipcode.rb KEN_ALL.CSV

cp ../../mozc-$MOZCVER/src/dictionary/gen_zip_code_seed.py .
cp ../../mozc-$MOZCVER/src/dictionary/zip_code_util.py .

# temporary fix
sed -i "s/from dictionary import zip_code_util/import zip_code_util/g" gen_zip_code_seed.py

# generate zip code entries
echo "generate zip code entries..."
python2 gen_zip_code_seed.py --zip_code=KEN_ALL.CSV.modzip --jigyosyo=JIGYOSYO.CSV > \
../src/zipcode.costs

# generate chimei costs
echo "generate chimei costs..."
ruby get-chimei-costs.rb KEN_ALL.CSV.modzip
cd ..


# ==============================================================================
# generate ut doctionary
# ==============================================================================

cat */*.hits */*.hits.modhits > src/jawikihits_all

cd src/

if [ "$NICODIC" = "true" ]; then
	cat jawikihits_all ../niconico/niconico.hits > jawikihits_all.new
	mv jawikihits_all.new jawikihits_all
fi

echo "change mozcdic order..."
ruby change-mozcdic-order-to-utdic-order.rb mozcdict
echo "convert jawikihits to costs..."
ruby convert-jawikihits-to-costs.rb jawikihits_all

# generate ekimei costs
cd ../ekimei/
ruby generate-ekimei-costs.rb ekimei.hits
cd -

cat mozcdict.utorder jawikihits_all.costs \
../chimei/KEN_ALL.CSV.modzip.costs \
../edict-katakana-english/kanaeng.costs \
../ekimei/ekimei.costs > utdict.costs

if [ "$EJDIC" = "true" ]; then
	cat utdict.costs ../wordnet-ejdic/wordnet-ejdic.costs > utdict.costs.new
	mv utdict.costs.new utdict.costs
fi

echo "split new words and add id..."
ruby split-new-words-and-add-id.rb utdict.costs
mv utdict.costs.new utdict.costs

cat utdict.costs zipcode.costs ../../mozc-$MOZCVER/src/data/dictionary_oss/dictionary00.txt > dictionary00.txt
mv dictionary00.txt ../../mozc-$MOZCVER/src/data/dictionary_oss/

# change mozc branding
cd ../../mozc-$MOZCVER/src/base/
sed -i "s/\"Mozc\"/\"Mozc-UT2\"/g" const.h
cd -

# copy docs and PKGBUILD
cd ..
mkdir -p ../mozc-$MOZCVER/docs-ut/
cp {AUTHORS,ChangeLog,COPYING,README.md} ../mozc-$MOZCVER/docs-ut/
cp PKGBUILD ../mozc-$MOZCVER/

cp -r docs/ ../mozc-$MOZCVER/docs-ut/

if [ "$EJDIC" != "true" ]; then
	rm -rf ../mozc-$MOZCVER/docs-ut/wordnet-ejdic/
fi

if [ "$NICODIC" != "true" ]; then
	rm -rf ../mozc-$MOZCVER/docs-ut/niconico/
fi

# rename mozc dir to mozc-ut2
cd ..
mv mozc-$MOZCVER mozc-ut2-$MOZCVER.$DICVER

# compress mozc-ut2
echo "compress mozc-ut2..."
tar cf mozc-ut2-$MOZCVER.$DICVER.tar mozc-ut2-$MOZCVER.$DICVER
XZ_OPT="--threads=0" xz -f mozc-ut2-$MOZCVER.$DICVER.tar
