/*
 *	convgram.c - convert JUMAN's connect file & grammar file to ChaSen's
 *
 *	last modified by A.Kitauchi <akira-k@is.aist-nara.ac.jp>, Nov. 1996
 *
 */

#include "chadic.h"

#define LINEMAX 8192

static int match_hinsi_name(str)
    char *str;
{
    int i, d;
    char *name;

    if (!strncmp(str, "*", 1))
      return 1;
    if (!strncmp(str, JSTR_BUNTO, strlen(JSTR_BUNTO)))
      return 1;
    if (!strncmp(str, JSTR_BUNMATSU, strlen(JSTR_BUNTO)))
      return 1;

#ifdef VGRAM
    for (i = 0; d = Hinsi[0].daughter[i]; i++)
      if (!strncmp(str, Hinsi[d].name, strlen(Hinsi[d].name)))
	return 1;
#if 0
    {printf("match: %s\n", Hinsi[d].name); return 1;}
#endif
#else
    for (i = 0; name = Class[i][0].id; i++)
      if (!strncmp(str, name, strlen(name)))
	return 1;
#endif

    return 0;
}

static void convert_grammar(vdicdir)
    char *vdicdir;
{
    FILE *fpi, *fpo;
    char line[LINEMAX], *s, *filein, fileout[CHA_FILENAME_MAX];
    int parlevel, npar2;

    fpi = cha_fopen_grammar2(CHA_GRAMMARFILE, JM_GRAMMARFILE, "r", 1, 0, &filein);
    sprintf(fileout, "%s/%s", vdicdir, VCHA_GRAMMARFILE);
    fpo = cha_fopen(fileout, "w", 1);

    fprintf(stderr, "converting %s -> %s\n", filein, fileout);

    parlevel = npar2 = 0;
    while (fgets(line, sizeof(line), fpi) != NULL) {
	for (s = line; *s; s++) {
	    switch (*s) {
	      case '(':
		if (++parlevel == 2) {
		    if (++npar2 > 1)
		      fputc(' ', fpo);
		    continue;
		}
		break;
	      case ')':
		if (parlevel-- == 2)
		  continue;
		if (parlevel == 0)
		  npar2 = 0;
		break;
	      case ' ':
	      case '\t':
		if (parlevel == 2 && npar2 == 1 ||
		    parlevel == 3 && npar2 > 1)
		  continue;
		break;
	      case ';':
		fputs(s, fpo);
		goto next_line;
	    }
	    fputc(*s, fpo);
	}
      next_line:;
    }
    fclose(fpi);
    fclose(fpo);
}

static void convert_connect(vdicdir)
    char *vdicdir;
{
    FILE *fpi, *fpo;
    char filein1[CHA_FILENAME_MAX], filein2[CHA_FILENAME_MAX], fileout[CHA_FILENAME_MAX];
    char line[LINEMAX], hinsi[LINEMAX], *s, *filein;
    int parlevel, nelem, korean, in_rule, nhinsi, skip;
    int cost = 0;

    nhinsi = 0; /* to avoid warning */
    sprintf(filein1, "%s.c", JM_CONNECTFILE);
    sprintf(filein2, "%s.c", CHA_CONNECTFILE);
    sprintf(fileout, "%s/%s.c", vdicdir, VCHA_CONNECTFILE);

    fpi = cha_fopen_grammar2(filein1, filein2, "r", 1, 0, &filein);
    fpo = cha_fopen(fileout, "w", 1);

    read_grammar(stderr, 1, 0);
    fprintf(stderr, "converting %s -> %s\n", filein, fileout);

    parlevel = nelem = 0;
    skip = korean = 0;
    while (fgets(line, sizeof(line), fpi) != NULL) {
	in_rule = parlevel == 0 && line[0] != '(';

	for (s = line; *s; s++) {
	    if (*s == '(') {
		if (match_hinsi_name(s + 1)) {
		    fputc('(', fpo);
		    nhinsi = 1;
		}
	    }
	    else if (nhinsi) {
		/*
		 * nhinsi - 1: first hinsi
		 *          2: first space
		 *          3: second hinsi
		 */
		int space = (*s == ' ' || *s == '\t' || *s == '\n');
		if (nhinsi == 2) {
		    if (space)
		      continue;
		    nhinsi++;
		    if (*s != '*') {
			skip = 0;
			fputc(' ', fpo);
		    }
		} else {
		    if (space)
		      if (++nhinsi == 2)
			skip = 1;
		}		    
		if (nhinsi == 4 || *s == ')') {
		    nhinsi = 0;
		    skip = 0;
		    fputc(')', fpo);
		}
	    }

	    if (skip)
	      continue;

	    if (in_rule || korean && *s != 033) {
		fputc(*s, fpo);
		continue;
	    }

	    switch (*s) {
	      case 033:
		if (s[1] == '(') {
		    fputc(*s++, fpo);
		    korean = 0;
		} else if (s[1] == '$' && s[2] == '(') {
		    /* Korean */
		    korean = 1;
		    fputc(*s++, fpo);
		    fputc(*s++, fpo);
		}
		break;
	      case ';':
		fputs(s, fpo);
		goto next_line;
	      case '(':
		if (++parlevel == 1)
		  fputc('(', fpo);
		break;
	      case ')':
		if (--parlevel == 1) {
		    if (++nelem == 2) {
			fputc(')', fpo);
			nelem = 0;
		    }
		} else if (parlevel == 0) {
		    fprintf(fpo, "%d", (!cost ? 1 : cost) * 10);
		    cost = 0;
		}
		break;
	    }
	    if (parlevel == 1 && *s >= '0' && *s <= '9')
	      cost = cost * 10 + *s - '0';
	    else
	      fputc(*s, fpo);
	}
      next_line:;
    }
    fclose(fpi);
    fclose(fpo);
}

static void convert_dic(vdicdir, dicfiles)
    char *vdicdir, **dicfiles;
{
    char **dicp, fileout[CHA_FILENAME_MAX];
    FILE *fpi, *fpo;

    for (dicp = dicfiles; *dicp; dicp++) {
	fpi = cha_fopen(*dicp, "r", 1);
	sprintf(fileout, "%s/%s", vdicdir, *dicp);
	fpo = cha_fopen(fileout, "w", 1);
	fprintf(stderr, "converting %s -> %s\n", *dicp, fileout);

	fprintf(fpo, "(%s 10)\n", ESTR_DEF_POS_COST);
	while (!s_feof(fpi)) {
	    cell_t *cell, *cell2, *cell3;
	    cell = cell2 = s_read(fpi);
	    while (atomp(car(cell2)))
	      cell2 = car(cdr(cell2));
	    if (nullp(cell3 = assoc(tmp_atom(JSTR_WORD), cell2)))
	      if (nullp(cell3 = assoc(tmp_atom(ESTR_WORD), cell2)))
		cha_exit(1, "can't find midasi\n"); /* cha_exit_file */
	    for (cell2 = cdr(cell3); !nullp(cell2); cell2 = cdr(cell2)) {
		cell3 = car_val(cell2);
		if (!atomp(cell3) && !nullp(cdr_val(cell3))) {
		    char cost_str[256];
		    sprintf(cost_str, "%.0f",
			    atof(s_atom_val(car_val(cdr_val(cell3)))) * 10);
		    s_atom_val(car_val(cdr_val(cell3))) = cha_strdup(cost_str);
		}
	    }
	    s_print(fpo, cell);
	    fputc('\n', fpo);
	    s_free(cell);
	}
	fclose(fpi);
	fclose(fpo);
    }
}

main(argc, argv)
    int argc;
    char *argv[];
{
    set_progpath(argv[0]);

    /* usage: convgram vdicdir dicfiles... */
    convert_grammar(argv[1]);
    convert_connect(argv[1]);
    convert_dic(argv[1], &argv[2]);

    return 0;
}
