#!/usr/bin/ruby
# -*- coding:utf-8 -*-

require 'nkf'


# ==============================================================================
# fix_costs
# ==============================================================================

fix_costs = Proc.new do
	# mozcのidとコストに変換したneologd辞書を読み込む
	# jawikititles	0	0	34	中居正広
	# なかいまさひろ	1917	1917	6477	中居正広
	file = File.new($filename, "r")
		lines = file.read.split("\n")
	file.close

	lines.length.times do |i|
		s = lines[i].split("	")
		lines[i] = s[-1] + "	" + s[0..3].join("	")
	end

	lines = lines.sort

	lines.length.times do |i|
		if lines[i + 1] == nil
			next
		end

		s1 = lines[i].split("	")
		s2 = lines[i + 1].split("	")

		# jawikiの見出しにヒットする表記のコスト値を減らす
		# 減らしすぎると他の変換に影響が出るのでヒット数の最大値を制限する
		# 中居正広	jawikititles	0	0	34
		# 中居正広	なかいまさひろ	1917	1917	6477
		if s1[1] == "jawikititles" &&
		s1[-1].to_i > 30
			s1[-1] = "30"
		end

		# jawikiの見出しにヒットする表記はコスト値を5000台にする
		# しながわく	1920	2013	3858	品川区
		if s1[1] == "jawikititles" &&
		s1[0] == s2[0]
			s2[-1] = (5000 + (s2[-1].to_i / 10) - (s1[-1].to_i * 30)).to_s
			lines[i + 1] = s2.join("	")
			next
		end

		# jawikiの見出しにヒットしない表記はコスト値を8000台にする
		# 「ねがいがいつかかなう」が「願いが一花カナウ」になるのを避ける
		if s2[1] != "jawikititles"
			s2[-1] = (8000 + (s2[-1].to_i / 10)).to_s
			lines[i + 1] = s2.join("	")
		end
	end

	lines.length.times do |i|
		s = lines[i].split("	")

		if s[1] == "jawikititles"
			lines[i] = nil
			next
		end

		lines[i] = s[1..-1].join("	") + "	" + s[0]
	end

	lines = lines.compact.sort

	# "人名,一般" のidを取得
	file = File.new("id.def", "r")
		while s = file.gets
			if s.index(" 名詞,固有名詞,人名,一般,*,*,*") != nil
				id_jinmei = s.chomp.split(" ")[0]
			elsif s.index(" 名詞,一般,*,*,*,*,*") != nil
				id_ippan = s.chomp.split(" ")[0]
			elsif s.index(" 名詞,固有名詞,一般,*,*,*,*") != nil
				id_filter = s.chomp.split(" ")[0]
			end
		end
	file.close

	# 単語フィルタを読み込む
	file = File.new("../src/filter-ut.txt", "r")
		filter = file.readlines
	file.close

	filter.length.times do |i|
		# エントリが正規表現になっているときは正規表現を作る
		if filter[i].index("/") == 0
			filter[i] = /#{filter[i].chomp[1..-2]}/
		elsif filter[i].chomp == ""
			filter[i] = nil
		else
			filter[i] = filter[i].chomp
		end
	end

	filter = filter.compact

	dicfile = File.new($filename + ".fixcosts", "w")

	lines.length.times do |i|
		s = lines[i].chomp.split("	")

		# 表記をフィルタリング
		if s[1] == id_filter
			filter.length.times do |c|
				if s[4].index(filter[c]) != nil
					s[4] = ""
					break
				end
			end
		end

		if s[4] == ""
			next
		end

		dicfile.puts s.join("	")
	end

	dicfile.close
end


# ==============================================================================
# main
# ==============================================================================

targetfiles = ARGV

if ARGV == []
	puts "Usage: ruby script.rb [FILE]"
	exit
end

targetfiles.length.times do |i|
	$filename = targetfiles[i]

	fix_costs.call
end
