# prime/engine/engine-userdict2-en.rb
# $Id: engine-userdict2-en.rb,v 1.2 2005/03/07 07:51:32 komatsu Exp $
#
# Copyright (C) 2002 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'prime/prime-dict-config.rb'
require 'prime/engine/engine'
require 'prime/engine/engine-userdict2-lib.rb'

require 'thread'

USERDICT2ENGLISH = 'userdict2english'
$engine_class_name = 'PrimeEngineUserdict2English'

class PrimeEngineUserdict2English < PrimeEngine
  def initialize
    super

    @score_offset         = 15000
    @score_offset_context = 18000

    @name = "Userdict2 engine for English"
    @id   = "userdict2-english"
    @description = "Userdict2 engine for English"

    @dictname = File::join2(PRIME_USER_DIR, USERDICT2ENGLISH)
    @dict = PrimeDynamicDict.new(@dictname)
    @postable = PrimeDictPOS.new(@dictname)

    initialize_priority()
    initialize_log()

    @max_candidates = 10
    @last_context_literal = ""
    @last_context_index   = nil
  end

  def initialize_priority
    @longest_time = 3600 * 24 * 7
    @base_score = 1000
    @step_number = @base_score / get_priority_func(@longest_time)
  end

  def initialize_log()
    file_option = (File::CREAT|File::WRONLY|File::APPEND)
    file_io = Kernel::open(@dictname + ".log", file_option)
    file_io.chmod(0600) if file_io::stat.owned?
    @file_logbuffer = file_io
    @logbuffer = ""
  end

  def parse_word(data, score_offset = @score_offset)
    (id, word_data) = data
    (pron, literal, pos, timestamp, annotation) = word_data
    score = get_priority(timestamp) + score_offset
    posname = @postable.get_pos(pos)
    ## FIXME: This score adjustment is ad-hoc.
    ## FIXME: (2004-05-05) <komatsu@taiyaki.org>
    if posname =~ /^::/ or posname == "" then
      score -= 2000
    end
    word = PrimeWord.new(pron, literal, @postable.get_pos(pos), score)
    return word
  end

  def lookup_dict_prefix(query_lines, context = 0, pos = 0)
    if context == 0 then
      score_offset = @score_offset
    else
      score_offset = @score_offset_context
    end

    results = []
    query_lines.each {|query|
      @dict.search_prefix(query, context, pos).each {|data|
        word = parse_word(data, score_offset)
        results.push(word)
      }
    }
    return results
  end

  def lookup_dict_exact(query_lines, context = 0, pos = 0)
    if context == 0 then
      score_offset = @score_offset
    else
      score_offset = @score_offset_context
    end

    results = []
    query_lines.each {|query|
      @dict.search_exact(query, context, pos).each {|data|
        word = parse_word(data, score_offset)
        results.push(word)
      }
    }
    return results
  end

  def lookup_dict_literal_prefix(query_lines, context = 0, pos = 0)
    if context == 0 then
      score_offset = @score_offset
    else
      score_offset = @score_offset_context
    end

    results = []
    query_lines.each {|query|
      @dict.search_literal_prefix(query, context, pos).each {|data|
        word = parse_word(data, score_offset)
        results.push(word)
      }
    }
    return results
  end

  def lookup_dict_literal_exact(query_lines, context = 0, pos = 0)
    if context == 0 then
      score_offset = @score_offset
    else
      score_offset = @score_offset_context
    end

    results = []
    query_lines.each {|query|
      @dict.search_literal_exact(query, context, pos).each {|data|
        word = parse_word(data, score_offset)
        results.push(word)
      }
    }
    return results
  end

  def search (query)
    words = PrimeWordList.new()
    if query.pos then
      pos = @postable.get_index(query.pos)
    end

    context_id = get_context_id(query.context)

    case query.method
    when :context then
      if context_id != 0 then
        words += lookup_dict_prefix(query.input, context_id)
      end

    when :prefix then
      words += lookup_dict_prefix(query.input, context_id, pos)
      words += lookup_dict_prefix(query.input, 0, pos)

    when :exact then
      words += lookup_dict_exact(query.input, context_id, pos)
      words += lookup_dict_exact(query.input, 0, pos)

    when :literal_prefix then
      words += lookup_dict_literal_prefix(query.input, context_id, pos)
      words += lookup_dict_literal_prefix(query.input, 0, pos)

    when :literal_exact then
      words += lookup_dict_literal_exact(query.input, context_id, pos)
      words += lookup_dict_literal_exact(query.input, 0, pos)

    else
      return words
    end

    return words
  end

  # ---------------------------------------- End of searching methods.

  def learn_word (pron, literal, pos, context, suffix, rest)
    if $PRIME_NO_SAVE then
      return true
    end

    if pos == "ü" then
      dictbuffer_flush()
      return true
    end

    pos = pos.empty? ? "̤θ" : pos
    context_id = get_context_id(context)

    ## Flush dictbuffer if context is empty or is not last_context
    if context != @last_context_literal or context == "" then
      dictbuffer_flush()
    end

    word_id = learn_word_internal(pron, literal, pos, context_id)

    if suffix.length > 0 then
      word_id = learn_word_internal(suffix, suffix, "::#{pos}", word_id)
    end
    if rest.length > 0 then
      word_id = learn_word_internal(rest, rest, "", word_id)
    end

    unless suffix.empty? or rest.empty? then
      pron    += (suffix + rest)
      literal += (suffix + rest)
      word_id = learn_word_internal(pron, literal, "ʸ", context_id,
                                    :nologging)
    end

    @last_context_id      = word_id
    @last_context_literal = literal

    return true
  end

  def learn_word_internal (pron, literal, pos, context_id, logging = :logging)
    timestamp = Time.new().to_i()

    word_id = @dict.learn_word(pron, literal, @postable.get_index(pos),
                               timestamp, context_id)

    if logging == :logging then
      @logbuffer += [pron, pos, literal, timestamp].join("\t") + "\n"
    end

    return word_id
  end


  def get_context_id (context)
    context_id = 0
    if context == @last_context_literal then
      context_id = @last_context_id.to_i
    elsif context then
      word = @dict.search_literal_exact(context)[0]
      if word then
        context_id = word[0]
      end
    end
    return context_id
  end

  # ---------------------------------------- End of learning module

  def get_pos_data (string)
    pos_data = {}
    string.increase {|pron|
      pos_list = @dict.search_pos_list(pron).map {|pos_num|
        @postable.get_pos(pos_num)
      }
      pos_data[pron] = pos_list
    }
    return pos_data
  end

  # ---------------------------------------- End of getting a pos_list module

  def dictbuffer_flush ()
    if $PRIME_NO_SAVE then
      return
    end

    unless @logbuffer.empty? then
      @file_logbuffer.flock(File::LOCK_EX|File::LOCK_NB)
      @file_logbuffer.print(@logbuffer + "\n") 
      @file_logbuffer.flock(File::LOCK_UN|File::LOCK_NB)
      @logbuffer = ""
    end
  end

  def close
    dictbuffer_flush()
    return true
  end

  # ---------------------------------------- End of writing dictionaries.

  private
  def get_priority (timestamp)
    current_time = Time.new().to_i
    time_diff = current_time - timestamp
    priority = (@base_score - get_priority_func(time_diff) * @step_number).to_i
    return priority
  end

  def get_priority_func(number)
    if number < 0 then
      return 0
    else
      return Math::log(number * (1.0 / @base_score) + 1)
    end
  end
end

