# prime/engine/engine-basic.rb
# $Id: engine-basic.rb,v 1.1.1.1.2.5 2004/01/11 23:11:46 komatsu Exp $
#
# Copyright (C) 2002 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'prime/taiyaki'
require 'sary'
require 'prime/prime-dict-config.rb'
require 'prime/engine/engine-japanese'

$engine_class_name = 'PrimeEngineBasic'

class PrimeEngineBasic < PrimeEngineJapanese
  def initialize
    super

    @name = "Basic engine"
    @id   = "basic"
    @description = "Basic engine"

    initialize_posdict()
    @dict_file = File::join2(PRIME_DICT_DIR, 'prime-dict')
    @dict = Sary::Searcher.new(@dict_file)

    @dict_literal = Sary::Searcher.new(@dict_file, @dict_file + "_literal.ary")
    @max_candidates = 15
  end

  def initialize_posdict ()
    file_posdict  = File::join2(PRIME_DICT_DIR, 'prime-dict-pos')
    file_partdict = File::join2(PRIME_DICT_DIR, 'prime-dict-part')

    if File::exist?(file_posdict) then
      if File::exist?(file_partdict) then
        $stderr.puts("PRIME Warning:")
        $stderr.puts("    Both prime-dict-pos and prime-dict-part exist")
        $stderr.puts("    under '#{PRIME_DICT_DIR}'.")
        $stderr.puts("    The file name 'prime-dict-part' is obsolete.")
      end
      @partdict_file = file_posdict
    elsif File::exist?(file_partdict) then
      @partdict_file = file_partdict
    else
      $stderr.puts("PRIME ERROR:")
      $stderr.puts("    The POS dictionary #{file_posdict} is not found.")
      $stderr.puts("    Please install the latest prime-dict package.")
      Kernel::exit()
    end
    @partdict = Sary::Searcher.new(@partdict_file)
  end

  private
  def process_candidates (results, input_base = nil, rest = "")
    candidates = PrimeResult.new
    results.each {|result|
      (pattern, part, word, freq) = result.split(/\t/)
      if input_base then
        suffix = get_suffix(input_base, pattern, rest)
      else
        suffix = ""
      end
      priority = freq.to_i + 10000 - 1000 * rest.length
      cand = PrimeCandidate.new(pattern, word, priority, part, suffix, rest)
      candidates << cand
    }
    return candidates
  end

  def lookup_mixed_concat_data_list (data_list)
    data = [["", ""]]
    last_data = data_list.pop()
    data_list.each {|data2|
      tmp_data = []
      data.each {|(pron1, literal1)|
        data2[1].each {|literal2|
          literal = literal1 + literal2
          if @dict_literal.search(literal) then
            @dict_literal.each_context_line {|line|
              (pron, pos, literal, *rest) = line.chomp.split(/\t/)
              data2[0].each {|pron2|
                if pron == (pron1 + pron2) then
                  unless tmp_data.member?([pron, literal])
                    tmp_data.push([pron, literal]) 
                  end
                end
              }
            }
          end
        }
      }
      data = tmp_data
    }

    results = []
    data.each {|(pron1, literal1)|
      last_data[1].each {|literal2|
        literal = literal1 + literal2
        if @dict_literal.search(literal) then
          @dict_literal.each_context_line {|line|
            line.chomp!
            (pron, pos, literal, *rest) = line.split(/\t/)
            last_data[0].each {|pron2|
              if pron == (pron1 + pron2) then
                results << line
              end
            }
          }
        end
      }
    }
      
    return results
  end

  def lookup_mixed (input, max = 10)
    data_list = process_data(input.base)
    results  = lookup_mixed_concat_data_list(data_list)
    return process_candidates(results)
  end

  def process_data (pattern)
    if pattern.empty? then
      return []
    end

    chars = pattern.split(//)
    chars.length.step(1, -1) {|l|
      pattern = chars[0,l].join()
      if @dict.search(pattern + "\t") then
        literals = []
        @dict.each_context_line {|line|
          (pron, pos, literal, freq) = line.split(/\t/)
          literals << literal unless literals.member?(literal)
        }
        pattern2 = chars[l..-1].join()
        return [[[pattern], literals], *process_data(pattern2)]
      end

      if @dict_literal.search(pattern + "\t") then
        prons = []
        @dict_literal.each_context_line {|line|
          (pron, pos, literal, freq) = line.split(/\t/)
          prons << pron unless prons.member?(pron)
        }
        pattern2 = chars[l..-1].join()
        return [[prons, [pattern]], *process_data(pattern2)]
      end
    }
    pattern1 = [chars[0]]
    pattern2 = chars[1..-1].join()
    return [[[pattern], [pattern]], *process_data(pattern2)]
  end

  def lookup_expansion (input, max = 10)
    return lookup_expansion_internal([input.original], max)
  end

  def lookup_expansion_internal (patterns, max = 10)
    results = []
    if @dict_literal.multi_search(patterns) then
      @dict_literal.sort_occurrences()
      while line = @dict_literal.get_next_context_line do
        if max and results.length > max then
          break
        end
	results << line.chomp
      end
    end
    return process_candidates(results)
  end

  def lookup (input, method = :prefix)
    results = PrimeResult.new
    queries = make_queries(input.base)
    if method == :prefix then
      input.expands.each {|string|
	queries.add(string) 
      }
      rests = queries.rests
      max   = @max_candidates
    else
      queries.add(input.original, nil, "", "", false)
      rests = [""]
      max   = 100
    end

    rests.each {|rest|
      cur_results = lookup_dict(queries.query_lines(rest), max)
      cur_results.each {|result|
	(pattern, part, word, freq) = result.split(/\t/)
	suffix = get_suffix(input.base, pattern, rest)
	priority2 = freq.to_i + 10000 - rest.length * 1000
	cand = PrimeCandidate.new(pattern, word, priority2,
				  part, suffix, rest)
	results << cand
      }
      max -= cur_results.length
      (max > 0) or break
    }
    return results
  end

  def lookup_dict (query_lines, max = nil)
    results = []
    return results if max == (nil or 0)

    if !(query_lines.empty?) and @dict.multi_search(query_lines) then
      @dict.sort_occurrences
      while line = @dict.get_next_context_line do
	results << line.chomp
	if max and (results.length == max) then
	  break
	end
      end
    end
    return results
  end

  def lookup_part (base)
    if @partdict.search(base + "\t") then
      (yomi, *parts) = @partdict.get_next_context_line.chomp.split(/\t/)
      return parts
    else
      return []
    end
  end
end

