# prime/engine/engine-learndict2.rb
# $Id: engine-learndict2.rb,v 1.3.2.1 2003/12/13 22:25:27 komatsu Exp $
#
# Copyright (C) 2002, 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'jcode'
require 'prime/taiyaki'
require 'prime/engine/engine-japanese'

$KCODE = 'e'

LEARNDICT2 = 'prime-dict-user'

$engine_class_name = 'PrimeEngineLearndict2'

class PrimeEngineLearndict2 < PrimeEngineJapanese
  def initialize
    super
    @debug_mode = false

    @name = "Learndict engine"
    @id   = "learndict"
    @description = "Learndict engine"

    # ̤ͥʸ
    @filepath = File::join2(PRIME_USER_DIR, LEARNDICT2)
    @diff_filepath = @filepath + "_diff"
    @dict = initialize_userdict

    file_option = (File::CREAT|File::WRONLY|File::APPEND)
    @file      = Kernel::open(@filepath,      file_option)
    @diff_file = Kernel::open(@diff_filepath, file_option)
    @file.chmod(0600)      if @file::stat.owned?
    @diff_file.chmod(0600) if @diff_file::stat.owned?

    @max_candidates = 10
    @prev_context = ""
  end

  def initialize_userdict
    learndict_object = File::join2(PRIME_USER_DIR, LEARNDICT2 + "_2.rbo")
    load_diff = true
    dict = Marshal::init_file(learndict_object) {
      load_diff = false
      dict = PrimeLearnDict2.new
      dict.load_dict(@filepath)
      dict
    }
    File::chmod(0600, learndict_object) if FileTest::owned?(learndict_object)

    if load_diff then
      dict.load_dict(@diff_filepath)
      Marshal::dump_file(dict, learndict_object)
    end
    diff_file = Kernel::open(@diff_filepath, "w")
    diff_file.close

    dict.write_dict(File::join2(PRIME_USER_DIR, LEARNDICT2))
    return dict
  end

  def learn_word (key, value, part, context, suffix, rest)
# 1). ʤʤɤ0ˤʤ뤳Ȥ⤢.
# 2). Ҥ餬ʤŪ0ˤȤ⤢.
# 3). 饤ȤξΤ֤ʤ?
# 4). Ҥ餬ʤ촴ˤưγؽɤ뤫.

    if part == "ü" then
      return true
    end

    part = (part.length > 0) ? part : "̤θ"
    @dict.set_word(key, value, part, context)

    key2 = key
    val2 = value
    if suffix.length > 0 then
      key2 += suffix
      val2 += suffix
      @dict.set_word(key2, val2, "ʸ", context)
    end
    if rest.length > 0 then
      key2 += rest
      val2 += rest
      @dict.set_word(key2, val2, "ʸ", context)
    end

    write_dict(key, value, part, context, suffix, rest)

    return true
  end

  def close
    flush_dict
    @file.close
    return true
  end

  private
  def lookup (input, method = :prefix)
    results = PrimeResult.new
    queries = make_queries(input.base)
    if method == :prefix then
      input.expands.each {|string|
	queries.add(string) 
      }
      rests = queries.rests
    else ## method == :exact
      queries.add(input.original, nil, "", "", false)
      rests = [""]
    end

    context_indexes = (@dict.lookup_context(input.context) or [])

    rests.each {|rest|
      indexes = lookup_dict(queries.query_lines(rest))
      debug_message("context: #{(indexes & context_indexes).join(', ')}")
      debug_message("non-context: #{(indexes - context_indexes).join(', ')}")
#      indexes = (indexes & context_indexes) + (indexes - context_indexes)

      indexes.each {|index|
	(pattern, word, part) = @dict.data[index]
	suffix = get_suffix(input.base, pattern, rest)
        priority = 15000 + @dict.priority(index) - (rest.length * 1000)
	priority += 3000 if context_indexes.member?(index)
	cand = PrimeCandidate.new(pattern, word, priority,
				  part, suffix, rest)
	debug_message ("lookup: (#{pattern}, #{word}, #{priority})")
	results << cand
      }
    }
    return results
  end

  def lookup_dict (query_lines, max = nil)
    results = []
    return results if max == (nil or 0)

    if !(query_lines.empty?) then
      query_lines.each {|query|
	results = results | (@dict.lookup_dict(query) or [])
      }
    end
    return results
  end

  def lookup_part (base)
    debug_message("lookup_part(#{base}), #{@dict.lookup_part(base)}")
    return (@dict.lookup_part(base) or [])
  end

  ## Dictionary file

  def write_dict (key, value, part, context, suffix, rest)
    if context != @prev_context or context == "" then
      flush_dict
      add_buffer(context)
    end
    add_buffer([key, part, value])
    add_buffer(suffix)
    add_buffer(rest)
    @prev_context = [value, suffix, rest].join
  end

  def write_dict2 (value, context, suffix, rest)
    if context != @prev_context or context == "" then
      flush_dict
      add_buffer(context)
    end
    add_buffer(value)
    add_buffer(suffix)
    add_buffer(rest)
    @prev_context = [value, suffix, rest].join
  end

  def add_buffer (line)
    return if line == ""

    if line.class == Array then
      line = line.join("\t")
    end
    @learndict_buffer = "\n" unless @learndict_buffer
    @learndict_buffer += (line + "\n")
  end

  def flush_dict
    return unless @learndict_buffer
    @file.flock(File::LOCK_EX|File::LOCK_NB)
    @file.print(@learndict_buffer)
    @file.flock(File::LOCK_UN|File::LOCK_NB)
    @diff_file.flock(File::LOCK_EX|File::LOCK_NB)
    @diff_file.print(@learndict_buffer)
    @diff_file.flock(File::LOCK_UN|File::LOCK_NB)
    @learndict_buffer = nil
  end

end

class PrimeLearnDict2
  include Debug
  attr_reader :data

  def initialize
    @debug_mode = false

    @max = 3

    @word = {}
    @part = {}
    @dict = {}
    @data    = []
    @context = []
    @index   = []

    @data[0]    = nil
    @context[0] = 0
    @index[0]   = [0]

    # ϤΤʤ.
    @next = {}
  end

  def lookup_dict (key, part = nil)
    dict_key = [key, part].compact.join("\t")
    debug_message("lookup_dict(#{key}, #{part}): #{@dict[dict_key]}")
    debug_message("dict_key = \"#{dict_key}\"")
    return @dict[dict_key]
  end

  def lookup_context (context)
    if context.nil? || context.length == 0 then
      return []
    else
      return @next[context]
    end
  end

  def lookup_part (key)
    debug_message("lookup_part(#{key})")
    return @part[key]
  end
  
  def priority (word_index)
    return 1000 * @index[word_index][-1] / @context.length
  end

  ## Ūˤ context ʤ.
  def set_word (key, value, part, context)
    debug_message("set_word (#{key}, #{value}, #{part}, #{context})")
    key or return false

    # Get/Set word_index
    word_key = [key, part, value].join("\t")
    if @word.key?(word_key) then
      word_index = @word[word_key]
      debug_message("existed word: #{word_index} / #{@context.length}")
    else
      word_index = @word.length
      debug_message("new word: #{word_index} / #{@context.length}")
      @word[word_key] = word_index
      @data[word_index] = [key, value, part]
      @index[word_index] = []
    end

    # Set dict Hash
    pattern = ""
    (key.split(//) + ["\t", part + "\t"]).each {|char|
      pattern += char
      @dict[pattern] = [] unless @dict[pattern]
      @dict[pattern] = ([word_index] | @dict[pattern])
#      @dict[pattern] = @dict[pattern][0,@max]
    }

    # Set context Array
    @context.push(word_index)
    @index[word_index].push(@context.length - 1)

    # Set data Array
    @data[word_index] = [key, value, part]

    # Set part Hash
    if @part[key].nil? then
      @part[key] = [part]
    elsif @part[key].member?(part) == false then
      @part[key].push(part)
    end

    # Set next Array (Τʤ)
    @next[context] = [] unless @next[context]
    @next[context] = ([word_index] | @next[context])
#    @next[context] = @next[context][0,@max]

    return true
  end

  def load_dict (filename)
    File::exist?(filename) or return

    context   = ""
    pre_key   = ""
    pre_value = ""
    Kernel::open(filename, "r").readlines.each {|line|
      if line =~ /^[ \t]*$/ then
	context   = ""
	pre_key   = ""
	pre_value = ""
	next
      end

      (key, part, value) = line.chomp.split(/\t/)
      if part == nil or part == ""then
	if context == "" then
	  context = key
	else
	  pre_key   += key
	  pre_value += key
	  ## FIXME: !!!
	  ##	  set_word (pre_key, pre_value, "ʸ", context)
	end
      else
	set_word(key, value, part, context)
	pre_key   = key
	pre_value = value
	context   = value
      end
    }
  end

  def write_dict (filename)
    write_dict_partdict(filename + "-part")
  end

  private
  def write_dict_partdict (filename)
    dictfile = open(filename, "w")
    @part.keys.sort.each {|key|
      line = format("%s\t%s", key, @part[key].join("\t"))
      dictfile.puts(line)
    }
    dictfile.close
  end
end
