/* Copyright (C) 2003  Yamagata Yoriyuki
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 *
 * The following are the license and the copyright of the original code 
 */

/* Copyright (c) 2003 Nara Institute of Science and Technology
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name Nara Institute of Science and Technology may not be used to
 *    endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE Nara Institute
 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $Id: dumpdic.c,v 1.3 2004/02/05 03:49:03 yori Exp $
 */

#include <stdlib.h>
#include <stdio.h>
#include <limits.h>

#include "chadic.h"
#include "dartsdic.h"
#include "romaji.h"

#define NO_COMPOUND LONG_MAX

static int
myfputs(char *s, FILE *file){
  if (strlen(s)==0){
    return fputc('\0', file);
  } else {
    return fputs(s, file);
  }
}

static long
dump_dat(lexicon_t *lex, FILE *datfile, long compound)
{
    long index;
    da_dat_t dat;

    index = ftell(datfile);
    dat.stem_len = lex->stem_len;
    dat.heading_len = strlen(lex->headword);
    dat.reading_len = lex->reading_len;
    dat.pron_len = lex->pron_len;
    dat.base_len = strlen(lex->base);
    dat.info_len = strlen(lex->info);
    dat.compound = compound;
    if (fwrite(&dat, sizeof(dat), 1, datfile) != 1)
	cha_exit_perror("datfile");

    if (myfputs(lex->headword, datfile) < 0 || fputc('\0', datfile) < 0 ||
	myfputs(lex->reading, datfile) < 0 || fputc('\0', datfile) < 0 ||
	myfputs(lex->pron, datfile) < 0 || fputc('\0', datfile) < 0 ||
	myfputs(lex->base, datfile) < 0 || fputc('\0', datfile) < 0 ||
	myfputs(lex->info, datfile) < 0 || fputc('\0', datfile) < 0)
	cha_exit_perror("datfile location1");

    if (ftell(datfile) % 2)
	if (fputc('\0', datfile) < 0)
	    cha_exit_perror("datfile location2");

    if (index < 0)
	cha_exit_perror("datfile location3");

    return index;
}

static long
dump_lex(da_lex_t *lex, FILE *output)
{
    long index;

    index = ftell(output);
    if (fwrite(lex, sizeof(da_lex_t), 1, output) != 1)
	cha_exit_perror("lexfile");

    return index;
}

static da_lex_t *
assemble_lex(da_lex_t *lex, lexicon_t *entry, long dat_index)
{
    lex->posid = entry->pos;
    lex->inf_type = entry->inf_type;
    lex->inf_form = entry->inf_form;
    lex->weight = entry->weight;
    lex->con_tbl = entry->con_tbl;
    lex->dat_index = dat_index;

    return lex;
}

static long
dump_compound(lexicon_t *entries, FILE *lexfile, FILE *datfile)
{
    int i;
    short has_next;
    long compound_index = ftell(lexfile);
    long marker = 0L;

    for (i = 1; entries[i].pos; i++) {
	short hw_len = strlen(entries[i].headword);
	da_lex_t lex;
	long dat_index;

	has_next = 1;
	dat_index = dump_dat(entries + i, datfile, NO_COMPOUND);
	assemble_lex(&lex, entries + i, dat_index);
	fwrite(&hw_len, sizeof(short), 1, lexfile);
	marker = ftell(lexfile);
	if (fwrite(&has_next, sizeof(short), 1, lexfile) != 1)
	    cha_exit_perror("lexfile");
	dump_lex(&lex, lexfile);
    }
    has_next = 0;
    fseek(lexfile, marker, SEEK_SET);
    if (fwrite(&has_next, sizeof(short), 1, lexfile) != 1)
	cha_exit_perror("lexfile");
    fseek(lexfile, 0L, SEEK_END);

    return compound_index;
}

static void
register_lex_substring(da_build_t *builder, 
		       char *sub_yomi, 
		       char *sub_yomi_p,
		       char *yomi,
		       char *yomi_p,
		       long lex_index,
		       char *hgobi){
  char *syomi_stack[16], *syomip_stack[16];
  char *katsuyo;
  int index, i;
  index=0;

  for(;;){
    char c, *r;

    c = *yomi_p++;;
    switch(c){
      int sub_yomi_len, hgobi_len;

    case 0 :
      sub_yomi_len = sub_yomi_p - sub_yomi;
      hgobi_len = hgobi ? strlen(hgobi) : 0;
      katsuyo = (char *)malloc(sizeof(char)*(sub_yomi_len + hgobi_len + 1));
      
      strncpy(katsuyo, sub_yomi, sub_yomi_len);
      if (hgobi) strncpy(katsuyo + sub_yomi_len, hgobi, hgobi_len);
      katsuyo[sub_yomi_len + hgobi_len] = '\0';

      if (sub_yomi_len + hgobi_len > 0) {
	/*	printf("register %s\n", katsuyo); */
	da_build_add(builder, katsuyo, lex_index);

      } else {
	printf("Warning: Empty keyword in %s\n", yomi);
      }

      free(katsuyo);
      return;

    case '\\' :
      c = *yomi_p++;
      switch(c){
      case '\\' :
	*sub_yomi_p++ = '\\';
	break;
	
      case '{' :
	*sub_yomi_p++ = '{';
	break;
	
      default :
	*sub_yomi_p++ = '\\'; yomi_p--;
      };
      break;

    case '{' :
      r = sub_yomi_p;
      for(;;){
	c = *yomi_p++;
	switch(c){
	case 0:
	  printf("Warning: Unbalanced { in %s\n", yomi);
	  return;

	case '\\':
	  c = *yomi_p++;
	  switch(c){
	  case 0:
	    printf("Warning: Unbalanced { in %s\n", yomi);
	    return;

	  case '\\' :
	    *sub_yomi_p++ = '\\';
	    break;
	
	  case '/' :
	    *sub_yomi_p++ = '/';
	    break;
	
	  default :
	    *sub_yomi_p++ = '\\'; yomi_p--;
	  };
	  break;

	case '/' : 
	  syomi_stack[index] = 
	    (char *)malloc(sizeof(char)*(strlen(yomi)+1));
	  strncpy(syomi_stack[index], sub_yomi, sub_yomi_p-sub_yomi);
	  syomip_stack[index] = syomi_stack[index] 
	    + (sub_yomi_p - sub_yomi);
	  
	  sub_yomi_p=r;
	  if (index >= 16){
	    printf("Warning: Too many / in %s\n", yomi);
	  } else {
	    index++;
	  };
	  break;

	case '}' :
	  register_lex_substring(builder, 
				 sub_yomi, sub_yomi_p,
				 yomi, yomi_p, lex_index, hgobi);

	  for(i=0; i<index; i++){
	      register_lex_substring(builder, 
				     syomi_stack[i], 
				     syomip_stack[i],
				     yomi, yomi_p, lex_index, hgobi);
	      free(syomi_stack[i]);
	  }
	  return;

	default :
	  *sub_yomi_p++ = c;
	}
      }
      break;
      
    default : 
      *sub_yomi_p++ = c;
    }
  }
  abort();
}

static void
register_lex(da_build_t *builder, char *yomi, long lex_index, char *hgobi){
  char *sub_yomi;

  sub_yomi = (char*)malloc(sizeof(char) * (strlen(yomi) + 1));

  register_lex_substring(builder, sub_yomi, sub_yomi, 
			 yomi, yomi, lex_index, hgobi);
  free(sub_yomi);
  return;
}

int
dump_dic(lexicon_t *entries, FILE *output[], da_build_t *builder)
{
    FILE *datfile = output[0];
    FILE *lexfile = output[1];
    FILE *tmpfile = output[2];
    long dat_index, lex_index;
    da_lex_t lex;
    long compound = NO_COMPOUND;
    char *hira, *hgobi;		/* Ҥ餬ʤˤɤߤȸ */

    if (entries[1].pos)
	compound = dump_compound(entries, lexfile, datfile);

    dat_index = dump_dat(entries, datfile, compound);

    assemble_lex(&lex, entries, dat_index);
    if (entries[0].inf_type == 0 || entries[0].inf_form > 0) {
	lex_index = dump_lex(&lex, tmpfile);
	hira = katakana2hiragana(entries[0].reading);
	register_lex(builder, hira, lex_index, NULL);
	free (hira);
    } else {

	unsigned short con_tbl = lex.con_tbl;
	int i;

	for (i = 1; Cha_form[lex.inf_type][i].name; i++) {
	    lex.inf_form = i;
	    lex.con_tbl = con_tbl + i - 1;
	    /*	    strcpy(entries[0].headword + stem_len,
		    Cha_form[lex.inf_type][i].gobi); */
	    if (!entries[0].reading)
		continue;
	    lex_index = dump_lex(&lex, tmpfile);
	    hira = katakana2hiragana(entries[0].reading);
	    hgobi = katakana2hiragana(Cha_form[lex.inf_type][i].ygobi);
	    register_lex(builder, hira, lex_index, hgobi);
	    free (hira);
	    free (hgobi);
	}
    }

    return 0;
}
