# suikyo.rb: Romaji-Hiragana conversion library for Ruby
# $Id: suikyo.rb,v 1.6 2004/01/31 07:23:25 komatsu Exp $
#
# Copyright (C) 2002 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

$KCODE = 'e'
require 'jcode'
require 'kconv'
require 'suikyo/suikyo-config'

class File
  def File::join2 (*paths)
    dirs = paths[0..-2].map{|path|
      path ? path.split(File::Separator) : ""
    }
    join(dirs, paths[-1])
  end
end

class Suikyo
  attr_reader :table
  def initialize (table = nil)
    if table then
      @table = table
    else
      @table = SuikyoTable2.new
    end
  end

  def convert (string, table = @table)
    (conversion, pending, last_node) = convert_internal(string, table)
    return conversion + pending
  end

  def expand (string, table = @table)
    (conversion, pending, last_node) = convert_internal(string, table)

    if last_node and last_node.subtable then
      suffixes = expand_table(last_node.subtable).push(pending).compact.uniq
      conversions = suffixes.map {|suffix|
        conversion + suffix
      }
    else
      conversions = [conversion + pending]
    end
    return [conversion + pending, conversions]
  end

  def convert_internal (string, table = @table)
    chars = string.split(//)
    orig_table = table
    conversion = ""

    loop {
      pending = ""
      table   = orig_table
      node    = nil

      while table and chars.length > 0 do
        head = chars[0]
        tmp_node = table.word(head)
        table = (tmp_node and tmp_node.subtable)
        if tmp_node or pending == "" then
          pending += head unless head == " "
          node = tmp_node
          chars.shift
        end
      end

      if table.nil? and node and (node.result or node.cont) then
        pending = ""
        if node.result then
          conversion += node.result
        end
        if node.cont then
          chars.unshift(node.cont)
        end
      end

      if chars.length == 0 then
        if table.nil? then
          return [conversion + pending, "", nil]
        else
          return [conversion, pending, node]
        end
      else
        conversion += pending
      end
    }
  end


  def valid? (string, table = @table)
    # Check a validness of string conversion.
    #   valid: "ringo" -> "$B$j$s$4(B"
    # invalid: "apple" -> "$B$"$C(Bpl$B$((B"
    (conversion, conversions) = expand(string, table)

    # Checking "appl -> $B$"$C(Bpl" (invaild)
    if conversions.length == 1 and conversion !~ /^[a-zA-Z]*[^a-zA-Z]+$/ then
      return false
    end

    conversions.each {|word|
      if word =~ /^[^a-zA-Z]+[a-zA-Z]*$/ then
	return true
      end
    }
    return false
  end

  private
  def expand_table (table)
    return [] unless table

    results = []
    table.allresults_uniq.each {|result, cont|
      if cont then
	if @table.word(cont).subtable then
	  @table.word(cont).subtable.allresults_uniq.each {|subresult, subcont|
	    results.push(result + subresult)
	  }
	else
	  results.push(result + cont)
	end
      else
	results.push(result)
      end
    }
    return results.uniq
  end
end

class SuikyoTable
  attr_reader :table_files

  def initialize
    @word = Hash.new()
    @table_files = []
  end

  def set (string, result, cont = nil, unescape = true)
    if unescape then
      string = unescape(string)
      result = unescape(result)
      cont   = (cont and unescape(cont))
    end

    head = string.split(//)[0]
    rest = string.split(//)[1..-1].join
    @word[head] = SuikyoNode.new if @word[head].nil?

    if rest == "" then
      @word[head].result = result
      @word[head].cont   = cont
    else
      @word[head].subtable = self.class.new unless @word[head].subtable
      @word[head].subtable.set(rest, result, cont, false)
    end
  end

  def loadfile (filename, tablepath = nil)
    filepath = SuikyoTable::loadpath(filename, tablepath)
    if FileTest::exist?(filepath) then
      @table_files.push(filepath)
    else
      $stderr.puts "Suikyo.rb: conv-table '#{filepath}' is not found."
      return false
    end

    file = open(filepath, "r")
    comment_flag = false
    file.readlines.each{|line|
      line = line.toeuc.chomp
      if line =~ /^\/\*/ then
	comment_flag = true
      end
      unless line =~ /^\#|^\s*$/ or comment_flag then
	(string, result, cont) = line.sub(/^ /, "").split(/\t/)
	self.set(string, result, cont)
      end
      if line =~ /\*\// then
	comment_flag = false
      end
    }
    file.close
    return true
  end

  def SuikyoTable::loadpath (filename, tablepath = nil)
    if filename =~ /^\// then
      return filename
    else
      prefix = (tablepath or ENV['SUIKYO_TABLE_PATH'] or SUIKYO_TABLE_PATH)
      return File::join2(prefix, filename) 
    end
  end

  def word (char)
    return @word[char]
  end

  def allword
    return @word
  end

  def allresults
    # c => [$B$A(B, $B$A$c(B, $B$A$e(B, $B$A$g(B]
    results = []
    allword.each {|char, table|
      if table.result then
	results.push([table.result, table.cont])
      end
      if table.subtable then
	results += table.subtable.allresults
      end
    }
    return results.uniq
  end

  def allresults_uniq
    # c => [$B$A(B]
    results = allresults.sort {|pair1, pair2|
      pair1[0] <=> pair2[0]
    }
    (base_result, base_cont) = results[0]
    uniq_results = [results[0]]

    results.each {|result, cont|
      unless result.index(base_result) == 0 and cont == base_cont then
	uniq_results.push([result, cont])
	base_result = result
	base_cont   = cont
      end
    }
    return uniq_results
  end


  private
  def unescape (string)
    unescaped_string = ""
    while (index = string.index('\\')) do
      next_char = string[index + 1,1]
      case next_char
      when "x" then
	unescaped_string += string[0,index] + string[index + 2,2].hex.chr
	string = (string[index + 4..-1] or "")
      when "0" then
	unescaped_string += string[0,index]
	string = (string[index + 2..-1] or "")
      else
	unescaped_string += string[0,index] + next_char
	string = (string[index + 2..-1] or "")
      end
    end
    return unescaped_string + string
  end

  private
  class SuikyoNode
    attr_accessor :subtable, :cont, :result
    def initialize (result = nil, cont = nil, subtable = nil)
      @result   = result
      @cont     = cont
      @subtable = subtable
    end
  end
end

class SuikyoTable2 < SuikyoTable
  def word (char)
    if @word[char] then
      return @word[char]
    else
      return @word[char.swapcase]
    end
  end
end

