#### prime-sary.rb: Library for sary on PRIME.
#### $Id: prime-sary.rb,v 1.1.2.1 2003/12/21 14:29:27 komatsu Exp $
####
#### Copyright (C) 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
####     All rights reserved.
####     This is free software with ABSOLUTELY NO WARRANTY.
####
#### You can redistribute it and/or modify it under the terms of 
#### the GNU General Public License version 2.

require 'sary' # For PrimeMakeIndex
require 'progressbar'

class PrimeSaryMakeIndex
  def initialize(dictname, is_interactive = true)
    @dictname = dictname
    @is_interactive = is_interactive
  end

  def make_indexes(is_interactive = @is_interactive)
    make_index_learning(is_interactive)
    make_index_cooccurrence(is_interactive)
    make_index_pos(is_interactive)
  end

  def make_learndict_indexes(is_interactive = @is_interactive) 
    make_index_learning(is_interactive)
    make_index_cooccurrence(is_interactive)
    make_index_pos(is_interactive)
    make_index_pos_literal(is_interactive)
  end

  def make_basicdict_indexes(is_interactive = @is_interactive)
    make_index_basic(is_interactive)
    make_index_pos(is_interactive)
    make_index_pos_literal(is_interactive)
  end

  def make_index_pos(is_interactive = @is_interactive)
    make_index_internal("-pos", {0 => ""}, 
			(is_interactive ? "POS" : nil))
  end

  def make_index_pos_literal(is_interactive = @is_interactive)
    make_index_internal("-pos_literal", {0 => ""}, 
			(is_interactive ? "POS #2" : nil))
  end

  def make_index_cooccurrence(is_interactive = @is_interactive)
    make_index_internal("-co", {0 => "_pron", 1 => "_literal"},
			(is_interactive ? "COOCCUR" : nil))
  end

  def make_index_learning(is_interactive = @is_interactive)
    make_index_internal("", {0 => "_index", 1 => "_pron", 3 => "_literal"},
			(is_interactive ? "LEARNING" : nil))
  end

  def make_index_basic(is_interactive = @is_interactive)
    make_index_internal("", {0 => "", 2 => "_literal"},
			(is_interactive ? "BASIC DICT" : nil))
  end

  private
  def make_index_internal(suffix, rules, label)
    filename_dict = @dictname + suffix

    filenames_index = []
    ios_index = []
    rules.each {|offset, suffix_offset|
      filename_index = filename_dict + suffix_offset + ".ary"
      filenames_index[offset] = filename_index
      ios_index[offset] = open(filename_index, "w")
    }

    with_io(filename_dict, "r") {|dict|
      point = 0
      dict.each_with_pbar(label) {|line|
	if line !~ /^;/ then
	  rules.keys.each{|offset|
	    ios_index[offset].print(get_offset_string(point, line, offset))
	  }
	end
	point += line.length
      }
    }
    rules.keys.each{|offset|
      ios_index[offset].close()
      builder = Sary::Builder.new(filename_dict, filenames_index[offset])
      builder.block_sort
    }
  end

  def get_offset_string(point, line, nth)
    parts = line.split(/\t/)
    offset_string = [point + (parts[0,nth].join().length + nth)].pack('N')
    return offset_string
  end
end
