#!/bin/bash

MOZCVER="2.20.2677.102"
SEEDVER="20171002"


# ==============================================================================
# remove tmp files
# ==============================================================================

rm -rf ../../mozc-$MOZCVER/
rm -f id.def


# ==============================================================================
# generate neologd dictionary
# ==============================================================================

cp neologd.hits neologd.hits_bak

echo "get mecab-user-dict-seed..."
wget -nc https://github.com/neologd/mecab-ipadic-neologd/raw/master/seed/mecab-user-dict-seed.$SEEDVER.csv.xz

cd ../docs/neologd/
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/COPYING
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/ChangeLog
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/README.ja.md
curl -O https://raw.githubusercontent.com/neologd/mecab-ipadic-neologd/master/README.md
cd -

# extract mecab-user-dict-seed
echo "extract mecab-user-dict-seed..."
xz -dk mecab-user-dict-seed.$SEEDVER.csv.xz

# get mozc's id.def required by get-neologd-pairs.rb
echo "get mozc's id.def..."
cd ../../
tar xf mozc-$MOZCVER.tar.bz2
cd -
cp ../../mozc-$MOZCVER/src/data/dictionary_oss/id.def .

# get neologd yomi-hyouki pairs
echo "get neologd yomi-hyouki pairs..."
ruby get-neologd-pairs.rb mecab-user-dict-seed.$SEEDVER.csv

cat ../src/dictionary_oss/dictionary*.txt.mozc ../*/*.hits neologd.pairs > allwords
ruby ../src/apply-previous-hits.rb allwords

split -d -l $(expr $(grep '' allwords.new | wc -l) / 3 + 1) allwords.new allwords.new

ruby ../src/get-jawiki-hits.rb allwords.new00 &
ruby ../src/get-jawiki-hits.rb allwords.new01 &
ruby ../src/get-jawiki-hits.rb allwords.new02 &
wait

cat allwords.exist allwords.new*.hits > neologd.hits

rm neologd.pairs*
rm allwords*
