/*
 * Copyright (c) 1991-2003 Kyoto University
 * Copyright (c) 2000-2003 NAIST
 * All rights reserved
 */

/* voca_load_htkdict.c --- read in vocabulary data */

/* $Id: voca_load_htkdict.c,v 1.15 2003/10/02 04:11:45 ri Exp $ */

/* format is HTK Dictionary format */

/* word-internal context dependency is considered in this function */

#include <sent/stddefs.h>
#include <sent/vocabulary.h>
#include <sent/htk_hmm.h>

/* 
 * dictinary format:
 * 
 * 1 words per line.
 * 
 * fields: GrammarEntry [OutputString] phone1 phone2 ....
 * 
 *     GrammarEntry
 *		   (for N-gram)
 *		   word name in N-gram
 *                 (for DFA)
 *                 terminal symbol ID
 *
 *     [OutputString]
 *		   String to output when the word is recognized.
 *
 *     {OutputString}
 *		   String to output when the word is recognized.
 *                 Also specifies that this word is transparent
 * 
 *     phone1 phon2 ....
 *		   sequence of logical HMM name (normally phoneme)
 *                 to express the pronunciation
 */

#define PHONEMELEN_STEP  10	/* malloc base */
static char buf[MAXLINELEN];	/* read buffer */
static char bufbak[MAXLINELEN];	/* read buffer (for debug output) */

static char trbuf[3][20];
static char chbuf[30];
static char nophone[1];
static int  trp_l, trp, trp_r;

/* return string of triphone name composed from last 3 call */
/* returns NULL on end */
char *
cycle_triphone(char *p)
{
  int i;
  
  if (p == NULL) {		/* initialize */
    nophone[0]='\0';
    for(i=0;i<3;i++) trbuf[i][0] = '\0';
    trp_l = 0;
    trp   = 1;
    trp_r = 2;
    return NULL;
  }

  strcpy(trbuf[trp_r],p);

  chbuf[0]='\0';
  if (trbuf[trp_l][0] != '\0') {
    strcat(chbuf,trbuf[trp_l]);
    strcat(chbuf,HMM_LC_DLIM);
  }
  if (trbuf[trp][0] == '\0') {
    i = trp_l;
    trp_l = trp;
    trp = trp_r;
    trp_r = i;
    return NULL;
  }
  strcat(chbuf, trbuf[trp]);
  if (trbuf[trp_r][0] != '\0') {
    strcat(chbuf,HMM_RC_DLIM);
    strcat(chbuf,trbuf[trp_r]);
  }
  i = trp_l;
  trp_l = trp;
  trp = trp_r;
  trp_r = i;

  return(chbuf);
}
char *
cycle_triphone_flush()
{
  return(cycle_triphone(nophone));
}


/* parse winfo and set maximum state length */
static void
set_maxwn(WORD_INFO *winfo)
{
  int w,p,n;
  int maxwn;

  maxwn = 0;
  for (w=0;w<winfo->num;w++) {
    n = 0;
    for (p=0;p<winfo->wlen[w];p++) {
      n += hmm_logical_state_num(winfo->wseq[w][p]) - 2;
    }
    if (maxwn < n) maxwn = n;
  }
  winfo->maxwn = maxwn;
}

/* parse winfo and set maximum word length */
static void
set_maxwlen(WORD_INFO *winfo)
{
  WORD_ID w;
  int maxwlen;
  maxwlen = 0;
  for(w=0;w<winfo->num;w++) {
    if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w];
  }
  winfo->maxwlen = maxwlen;
}


/* read in vocabulary file */
boolean				/* TRUE on success, FALSE on any error word */
voca_load_htkdict(
     FILE *fp,
     WORD_INFO *winfo,
     HTK_HMM_INFO *hmminfo,	/* if NULL, phonemes are ignored */
     boolean ignore_tri_conv)	/* TRUE if convert to triphone should be ignored */
{
  boolean ok_flag = TRUE;
  WORD_ID vnum;
  boolean do_conv = FALSE;

  if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
    do_conv = TRUE;

  winfo_init(winfo);

  vnum = 0;
  while (getl(buf, sizeof(buf), fp) != NULL) {
    if (vnum >= winfo->maxnum) winfo_expand(winfo);
    if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, ignore_tri_conv, do_conv, &ok_flag) == FALSE) break;
    vnum++;
  }
  winfo->num = vnum;

  /* compute maxwn */
  set_maxwn(winfo);
  set_maxwlen(winfo);

  return(ok_flag);
}


/* read in vocabulary file (file descriptor version) */
boolean				/* TRUE on success, FALSE on any error word */
voca_load_htkdict_fd(
     int fd,
     WORD_INFO *winfo,
     HTK_HMM_INFO *hmminfo,	/* if NULL, phonemes are ignored */
     boolean ignore_tri_conv)	/* TRUE if convert to triphone should be ignored */
{
  boolean ok_flag = TRUE;
  WORD_ID vnum;
  boolean do_conv = FALSE;

  if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
    do_conv = TRUE;

  winfo_init(winfo);

  vnum = 0;
  while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
    if (vnum >= winfo->maxnum) winfo_expand(winfo);
    if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, ignore_tri_conv, do_conv, &ok_flag) == FALSE) break;
    vnum++;
  }
  winfo->num = vnum;

  /* compute maxwn */
  set_maxwn(winfo);
  set_maxwlen(winfo);

  return(ok_flag);
}

/* append a single entry to the existing dictionary */
boolean				/* TRUE on success, FALSE on any error word */
voca_append_htkdict(
     char *entry,               /* dictionary entry string to be appended */
     WORD_INFO *winfo,
     HTK_HMM_INFO *hmminfo,	/* if NULL, phonemes are ignored */
     boolean ignore_tri_conv)	/* TRUE if convert to triphone should be ignored */
{
  boolean ok_flag = TRUE;
  boolean do_conv = FALSE;

  if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
    do_conv = TRUE;

  if (winfo->num >= winfo->maxnum) winfo_expand(winfo);
  strcpy(buf, entry);		/* const buffer not allowed in voca_load_htkdict_line() */
  voca_load_htkdict_line(buf, winfo->num, winfo, hmminfo, ignore_tri_conv, do_conv, &ok_flag);

  if (ok_flag == TRUE) {
    winfo->num++;
    /* re-compute maxwn */
    set_maxwn(winfo);
    set_maxwlen(winfo);
  }

  return(ok_flag);
}


/* process dic entry in buf */
/* will set ok_flag to FALSE if there is an error in processing entry */
/* return TRUE on continuing next reading, FALSE for read termination */
boolean
voca_load_htkdict_line(char *buf, int vnum,
		       WORD_INFO *winfo,
		       HTK_HMM_INFO *hmminfo,
		       boolean ignore_tri_conv,
		       boolean do_conv,
		       boolean *ok_flag)
{
  char *ptmp, *lp = NULL, *p;
  static char cbuf[50];
  int tmpmaxlen, len;
  HMM_Logical **tmpwseq;
  HMM_Logical *tmplg;
  boolean pok;

  if (strmatch(buf, "DICEND")) return FALSE;

  /* backup whole line for debug output */
  strcpy(bufbak, buf);
  
  /* GrammarEntry */
  if ((ptmp = mystrtok(buf, " \t\n")) == NULL) {
    j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
    winfo->errnum++;
    *ok_flag = FALSE;
    return TRUE;
  }
  winfo->wname[vnum] = strcpy((char *)mymalloc(strlen(ptmp)+1), ptmp);

  /* just move pointer to next token */
  if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
    j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
    winfo->errnum++;
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
  }
#ifdef CLASS_NGRAM
  winfo->cprob[vnum] = 0.0;	/* prob = 1.0, logprob = 0.0 */
#endif
  
  if (ptmp[0] == '@') {		/* class N-gram prob */
#ifdef CLASS_NGRAM
    /* word probability within the class (for class N-gram) */
    /* format: classname @classprob wordname [output] phoneseq */
    /* classname equals to wname, and wordname will be omitted */
    /* format: @%f (log scale) */
    /* if "@" not found or "@0", it means class == word */
    if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
      j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
      winfo->errnum++;
      *ok_flag = FALSE;
      return TRUE;
    }
    if (ptmp[1] == '\0') {	/* space between '@' and figures */
      j_printerr("line %d: value after '@' missing, maybe wrong space?\n> %s\n", vnum+1, bufbak);
      winfo->errnum++;
      *ok_flag = FALSE;
      free(winfo->wname[vnum]);
      return TRUE;
    }
    winfo->cprob[vnum] = atof(&(ptmp[1]));
    if (winfo->cprob[vnum] != 0.0) winfo->cwnum++;
    /* read next word entry (just skip them) */
    if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
      j_printerr("line %d: corrupted data:\n> %s\n", vnum+1,bufbak);
      winfo->errnum++;
      *ok_flag = FALSE;
      free(winfo->wname[vnum]);
      return TRUE;
    }
    /* move to the next word entry */
    if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
      j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
      winfo->errnum++;
      *ok_flag = FALSE;
      free(winfo->wname[vnum]);
      return TRUE;
    }
#else  /* ~CLASS_NGRAM */
    j_printerr("line %d: cannot handle in-class word probability\n> %s\n", vnum+1, ptmp, bufbak);
    winfo->errnum++;
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
#endif /* CLASS_NGRAM */
  }

  /* OutputString */
  switch(ptmp[0]) {
  case '[':			/* not transparent word */
    winfo->is_transparent[vnum] = FALSE;
    ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
    break;
  case '{':			/* transparent word */
    winfo->is_transparent[vnum] = TRUE;
    ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
    break;
  default:
    j_printerr("line %d: missing output string??\n> %s\n", vnum+1, bufbak);
    winfo->errnum++;
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
  }
  if (ptmp == NULL) {
    j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
    winfo->errnum++;
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
  }
  winfo->woutput[vnum] = strcpy((char *)mymalloc(strlen(ptmp)+1), ptmp);
    
  /* phoneme sequence */
  if (hmminfo == NULL) {
    /* don't read */
    winfo->wseq[vnum] = NULL;
    winfo->wlen[vnum] = 0;
  } else {
    tmpmaxlen = PHONEMELEN_STEP;
    tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen);
    len = 0;
      
    if (do_conv) {
      /* convert phoneme to triphone expression (word-internal) */
      cycle_triphone(NULL);
      if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
	j_printerr("line %d: word %s has no phoneme:\n> %s\n", vnum+1, winfo->wname[vnum], bufbak);
	winfo->errnum++;
	*ok_flag = FALSE;
	free(winfo->wname[vnum]);
	free(winfo->woutput[vnum]);
	return TRUE;
      }
      cycle_triphone(lp);
    }

    pok = TRUE;
    for (;;) {
      if (do_conv) {
/*	if (lp != NULL) j_printf(" %d%s",len,lp);*/
	if (lp != NULL) lp = mystrtok(NULL, " \t\n");
	if (lp != NULL) p = cycle_triphone(lp);
	else p = cycle_triphone_flush();
      } else {
	p = mystrtok(NULL, " \t\n");
      }
      if (p == NULL) break;

      /* both defined/pseudo phone is allowed */
      tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
      if (tmplg == NULL) {
	/* not found */
	if (do_conv) {
	  /* both defined or pseudo phone are not found */
	  if (len == 0 && lp == NULL) {
	    j_printerr("line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", vnum+1, p, p);
	  } else if (len == 0) {
	    j_printerr("line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", vnum+1, p, p);
	  } else if (lp == NULL) {
	    j_printerr("line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", vnum+1, p, p);
	  } else {
	    j_printerr("line %d: triphone \"%s\" not found\n", vnum+1, p);
	  }
	} else {
	  j_printerr("line %d: phone \"%s\" not found\n", vnum+1, p);
	}
	pok = FALSE;
      } else {
	/* found */
	if (len >= tmpmaxlen) {
	  /* expand wseq area by PHONEMELEN_STEP */
	  tmpmaxlen += PHONEMELEN_STEP;
	  tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen);
	}
	tmpwseq[len] = tmplg;
      }
      len++;
    }
    if (!pok) {			/* error in phoneme */
      j_printerr("> %s\n", bufbak);
      winfo->errnum++;
      *ok_flag = FALSE;
      free(winfo->wname[vnum]);
      free(winfo->woutput[vnum]);
      return TRUE;
    }
    if (len == 0) {
      j_printerr("line %d: no phone specified:\n> %s\n", vnum+1, bufbak);
      winfo->errnum++;
      *ok_flag = FALSE;
      free(winfo->wname[vnum]);
      free(winfo->woutput[vnum]);
      return TRUE;
    }
    winfo->wseq[vnum] = tmpwseq;
    winfo->wlen[vnum] = len;
  }
  
  return(TRUE);
}


/* convert monophone dictionary to word-internal triphone */
boolean
voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
{
  WORD_ID w;
  int ph;
  char *p;
  HMM_Logical *tmplg;
  boolean ok_flag = TRUE;
  
  for (w=0;w<winfo->num;w++) {
    cycle_triphone(NULL);
    cycle_triphone(winfo->wseq[w][0]->name);

    for (ph = 0; ph < winfo->wlen[w] ; ph++) {
      if (ph == winfo->wlen[w] - 1) {
	p = cycle_triphone_flush();
      } else {
	p = cycle_triphone(winfo->wseq[w][ph + 1]->name);
      }
      if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) {
	j_printerr("voca_mono2tri: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p);
	ok_flag = FALSE;
	continue;
      }
      winfo->wseq[w][ph] = tmplg;
    }
  }
  return (ok_flag);
}

/* append whole winfo to other */
/* assume both use the same hmminfo */
void				/* TRUE on success, FALSE on any error word */
voca_append(
	    WORD_INFO *dstinfo,	/* append to this word_info */
	    WORD_INFO *srcinfo,	/* append this to dst */
	    int coffset,	/* category ID offset */
	    int woffset)	/* word ID offset */
{
  WORD_ID n, w;
  int i;

  n = woffset;
  for(w=0;w<srcinfo->num;w++) {
    /* copy data */
    dstinfo->wlen[n] = srcinfo->wlen[w];
    dstinfo->wname[n] = strcpy((char *)mymalloc(strlen(srcinfo->wname[w])+1), srcinfo->wname[w]);
    dstinfo->woutput[n] = strcpy((char *)mymalloc(strlen(srcinfo->woutput[w])+1), srcinfo->woutput[w]);
    dstinfo->wseq[n] = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * srcinfo->wlen[w]);
    for(i=0;i<srcinfo->wlen[w];i++) {
      dstinfo->wseq[n][i] = srcinfo->wseq[w][i];
    }
    dstinfo->is_transparent[n] = srcinfo->is_transparent[w];
    /* offset category ID by coffset */
    dstinfo->wton[n] = srcinfo->wton[w] + coffset;
    
    n++;
    if (n >= dstinfo->maxnum) winfo_expand(dstinfo);
  }
  dstinfo->num = n;

  /* compute maxwn */
  set_maxwn(dstinfo);
  set_maxwlen(dstinfo);
}

