/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* outprob_style.c --- compute outprob of a state */

/* $Id: outprob_style.c,v 1.8 2002/09/11 22:02:33 ri Exp $ */

/* $BBh#1%Q%9$G$OF0E*$KC18l4V$N2;AG4D6-0MB8@-$r9MN8$9$k!%(B
     $BC18l$N@hF,(Btriphone: $BD>A0C18l$N:G=*2;AG$K1~$8$FJQ2=(B
     $BC18l$NKvHx(Btriphone: $BF1$8:8%3%s%F%-%9%H$r;}$DA4(Btriphone$B$N:GBgCM(B(or$BJ?6Q(B)
     $B#12;AG$N$_$NC18l(B: $B>e5-$rN>J}9MN8(B
     $B$=$l0J30$N2;AG(B: $B$=$N$^$^7W;;(B($BC18lFb(Btriphone$B$OLZ9=B$2=<-=q9=C[;~$K9MN8:Q(B
   $BBh#1%Q%9$O>uBVC10L$G4IM}$5$l$k$?$a!$>uBV$4$H$K>e5-$N$I$NA`:n$,I,MW$G$"$k(B
   $B$+$N(B AS_Style ID $B$r3F>uBV$KIUM?$7$F$*$/!%(B

   $B$^$?!$$3$N$h$&$J%3%s%F%-%9%H$KH<$&(Btriphone$BJQ2=$O>uBV$4$H$K%U%l!<%`C10L$G(B
   $B%-%c%C%7%e$7$F$*$/$3$H$G7W;;NL$NA}Bg$rKI$0!%(B
*/

/* Since cross-word context dependency is dynamically handled in the 1st
   pass, HMM state on word head and word tail will vary.
   (word-internal is done on startup when constructing lexicon tree)
     word head: vary according to the previous word
     word tail: compute maximum/average score of all variant (cdset)
     special handling required for words consisting of only one phoneme.
     other states: treated as normal state (no context handling on search)

   To denote which operation to do for a state, AS_Style ID is assigned
   to each state.

   The triphone transformation will be cached by each state per frame,
   to suppress computation overhead.
 */

#include <julius.h>

static char rbuf[40];		/* temprary use */

#ifdef PASS1_IWCD
/* initialize state context cache */
void
outprob_style_cache_init(WCHMM_INFO *wchmm)
{
  int n;
  for(n=0;n<wchmm->n;n++) {
    if (wchmm->state[n].outstyle == AS_RSET) {
      (wchmm->state[n].out.rset)->cache.state = NULL;
    } else if (wchmm->state[n].outstyle == AS_LRSET) {
      (wchmm->state[n].out.lrset)->cache.state = NULL;
    }
  }
}


/**********************************************************************/
#ifdef CATEGORY_TREE
/* for Julian */
/*
 * $B%+%F%4%j(BID$BIU$-(B lcd_set $B$r:n$k(B
 *
 * $BC18l=*C<$N(B lcd_set$BL>$ODL>o$N(B "a-k" $B$H0[$J$j(B "a-k::38" $B$H$J$k(B($B%+%F%4%j(BID$BIUM?(B)
 * $BL$EPO?$N>l9g(B, $B8eB32DG=$J%+%F%4%jFb$NC18l$N@hF,2;AG$r%j%9%H%"%C%W$7!$(B
 * $B$=$l$H$N(B triphone $B$r(B lcd_set $B$H$7$FDI2C$9$k!%(B
 *
 * pseudo phone $B$HJ;MQ;~$NF0:n$OL$%A%'%C%/(B
 *
 */
/* Make category-indexed context-dependent state set for 'hmm' in category 'category'.
   The category ID is appended to the original CD_Set name.
   For example, if a word belongs to category ID 38 and has a phone "a-k" at
   word end, CD_Set "a-k::38" is generated and assigned to the phone instead
   of normal CD_Set "a-k".  The "a-k::38" set consists of triphones whose
   right context are the beginning phones within possibly fllowing categories.
*/
static char lccbuf[256], lccbuf2[256]; /* word area */
CD_Set *
lcdset_register_with_category(HTK_HMM_INFO *hmminfo, DFA_INFO *dfa, HMM_Logical *hmm, WORD_ID category)
{
  CD_Set *ret;
  WORD_ID c2, i, w;
  HMM_Logical *ltmp;

  int cnt_c, cnt_w, cnt_p;

  leftcenter_name(hmm->name, lccbuf);
  sprintf(lccbuf2, "%s::%04d", lccbuf, category);
  if ((ret = cdset_lookup(hmminfo, lccbuf2)) == NULL) {	/* not added yet */
    if (debug2_flag) {
      j_printf("category-aware lcdset {%s}...", lccbuf2);
    }
    cnt_c = cnt_w = cnt_p = 0;
    /* search for category that can connect after this category */
    for(c2=0;c2<dfa->term_num;c2++) {
      if (! dfa_cp(dfa, category, c2)) continue;
      /* for each word in the category, register triphone whose right context
	 is the beginning phones  */
      for(i=0;i<dfa->term.wnum[c2];i++) {
	w = dfa->term.tw[c2][i];
	ltmp = get_right_context_HMM(hmm, winfo->wseq[w][0]->name, hmminfo);
	if (ltmp == NULL) {
	  ltmp = hmm;
	  if (ltmp->is_pseudo) {
	    error_missing_right_triphone(hmm, winfo->wseq[w][0]->name);
	  }
	}
	if (! ltmp->is_pseudo) {
	  if (regist_cdset(hmminfo, ltmp->body.defined, lccbuf2)) {
	    cnt_p++;
	  }
	}
      }
      cnt_c++;
      cnt_w += dfa->term.wnum[c2];
    }
    if (debug2_flag) {
      j_printf("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p);
    }
  }
  return(ret);
}

/* $B%+%F%4%j(BID$BIU$-(BCD_Set$B$r8!:w$7$F%]%$%s%?$rJV$9(B ($B<:GT;~(B NULL) */
/* lookup category-indexed CD_Set and return the pointer (NULL if fail) */
CD_Set *
lcdset_lookup_with_category(HTK_HMM_INFO *hmminfo, HMM_Logical *hmm, WORD_ID category)
{
  leftcenter_name(hmm->name, lccbuf);
  sprintf(lccbuf2, "%s::%04d", lccbuf, category);
  return(cdset_lookup(hmminfo, lccbuf2));
}

/* Make all category-indexed context-dependent state set */
void
lcdset_register_with_category_all(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo, DFA_INFO *dfa)
{
  WORD_ID c1, w, w_prev;
  int i;
  HMM_Logical *ltmp;
  
  /* (1) $BC18l=*C<$N2;AG$K$D$$$F(B */
  /*     word end phone */
  for(w=0;w<winfo->num;w++) {
    ltmp = winfo->wseq[w][winfo->wlen[w]-1];
    lcdset_register_with_category(hmminfo, dfa, ltmp, winfo->wton[w]);
  }
  /* (2)$B#12;AGC18l$N>l9g(B, $B@h9T$7$&$kC18l$N=*C<2;AG$r9MN8(B */
  /*    for one-phoneme word, possible left context should be also considered */
  for(w=0;w<winfo->num;w++) {
    if (winfo->wlen[w] > 1) continue;
    for(c1=0;c1<dfa->term_num;c1++) {
      if (! dfa_cp(dfa, c1, winfo->wton[w])) continue;
      for(i=0;i<dfa->term.wnum[c1];i++) {
	w_prev = dfa->term.tw[c1][i];
	ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, hmminfo);
	if (ltmp == NULL) continue; /* 1$B2;AG<+?H$N(Blcd_set$B$O(B(1)$B$G:n@.:Q(B */
	if (ltmp->is_pseudo) continue; /* pseudo phone $B$J$i(Blcd_set$B$O$$$i$J$$(B */
	lcdset_register_with_category(hmminfo, dfa, ltmp, winfo->wton[w]);
      }
    }
  }
}
#endif


/**********************************************************************/
/* calculate output log probability of node 'node' with last word 'last_wid'
   on time frame 't' of input parameter 'param' */
/* state type and context cache is considered */
LOGPROB
outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
{
  HMM_Logical *ohmm, *rhmm;
  RC_INFO *rset;
  LRC_INFO *lrset;
  CD_Set *lcd;
  WORD_INFO *winfo = wchmm->winfo;
  HTK_HMM_INFO *hmminfo = wchmm->hmminfo;
  
  switch(wchmm->state[node].outstyle) {
  case AS_STATE:		/* normal state */
    return(outprob_state(t, wchmm->state[node].out.state, param));
  case AS_LSET:			/* end of word */
    return(outprob_cd(t, wchmm->state[node].out.lset, param));
  case AS_RSET:			/* beginning of word */
    rset = wchmm->state[node].out.rset;
    if (rset->lastwid_cache != last_wid || rset->cache.state == NULL) {
      /* cache miss...calculate */
      if (last_wid != WORD_INVALID) {
	/* lookup triphone with left-context (= last phoneme) */
	if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
	  rhmm = ohmm;
	} else {
	  /* if triphone not found, try to use the bi-phone itself */
	  rhmm = rset->hmm;
	  /* If the bi-phone is explicitly specified in hmmdefs/HMMList,
	     use it.  if both triphone and biphone not found in user-given
	     hmmdefs/HMMList (in such case, the biphone has been defined
	     as "pseudo" phone), output a warning (all mapping should be
	     explicitly defined in HMMList) */
	  if (rhmm->is_pseudo) {
	    error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
	  }
	}
      } else {
	/* if last word is WORD_INVALID try to use the bi-phone itself */
	rhmm = rset->hmm;
	/* If the bi-phone is explicitly specified in hmmdefs/HMMList,
	   use it.  if both triphone and biphone not found in user-given
	   hmmdefs/HMMList (in such case, the biphone has been defined
	   as "pseudo" phone), output a warning (all mapping should be
	   explicitly defined in HMMList) */
	if (rhmm->is_pseudo) {
	  error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
	}
      }
      /* rhmm may be a pseudo phone */
      /* store to cache */
      if (rhmm->is_pseudo) {
	rset->last_is_lset  = TRUE;
	rset->cache.lset    = &(rhmm->body.pseudo->stateset[rset->state_loc]);
      } else {
	rset->last_is_lset  = FALSE;
	rset->cache.state   = rhmm->body.defined->s[rset->state_loc];
      }
      rset->lastwid_cache = last_wid;
    }
    /* calculate outprob and return */
    if (rset->last_is_lset) {
      return(outprob_cd(t, rset->cache.lset, param));
    } else {
      return(outprob_state(t, rset->cache.state, param));
    }
  case AS_LRSET:		/* 1 phoneme --- beginning and end */
    lrset = wchmm->state[node].out.lrset;
    if (lrset->lastwid_cache != last_wid || lrset->cache.state == NULL) {
      /* cache miss...calculate */
      rhmm = lrset->hmm;
      /* lookup cdset for given left context (= last phoneme) */
      strcpy(rbuf, rhmm->name);
      if (last_wid != WORD_INVALID) {
	add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
      }
#ifdef CATEGORY_TREE
      if (!old_iwcd_flag) {
	/* use category-indexed cdset */
	if (last_wid != WORD_INVALID &&
	    (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
	  lcd = lcdset_lookup_with_category(hmminfo, ohmm, lrset->category);
	} else {
	  lcd = lcdset_lookup_with_category(hmminfo, rhmm, lrset->category);
	}
      } else {
	lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
      }
#else
      lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
#endif /* CATEGORY_TREE */
      if (lcd != NULL) {	/* found, set to cache */
	lrset->last_is_lset  = TRUE;
        lrset->cache.lset    = &(lcd->stateset[lrset->state_loc]);
        lrset->lastwid_cache = last_wid;
      } else {
	/* no relating lcdset found, falling to normal state */
	if (rhmm->is_pseudo) {
	  lrset->last_is_lset  = TRUE;
	  lrset->cache.lset    = &(rhmm->body.pseudo->stateset[lrset->state_loc]);
	  lrset->lastwid_cache = last_wid;
	} else {
	  lrset->last_is_lset  = FALSE;
	  lrset->cache.state   = rhmm->body.defined->s[lrset->state_loc];
	  lrset->lastwid_cache = last_wid;
	}
      }
      /*printf("[%s->%s]\n", lrset->hmm->name, rhmm->name);*/
    }
    /* calculate outprob and return */
    if (lrset->last_is_lset) {
      return(outprob_cd(t, lrset->cache.lset, param));
    } else {
      return(outprob_state(t, lrset->cache.state, param));
    }
  default:
    /* should not happen */
    j_printerr("no outprob style??\n");
    return(LOG_ZERO);
  }
}

#else  /* ~PASS1_IWCD */

LOGPROB
outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
{
  return(outprob_state(t, wchmm->state[node].out, param));
}

#endif /* PASS1_IWCD */


/* called when no IW-triphone found (and no bi/mono-phone specified in HMMList)  */
void
error_missing_right_triphone(HMM_Logical *base, char *rc_name)
{
  /* only output message */
  strcpy(rbuf, base->name);
  add_right_context(rbuf, rc_name);
  j_printerr("Error: IW-triphone \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
}
void
error_missing_left_triphone(HMM_Logical *base, char *lc_name)
{
  /* only output message */
  strcpy(rbuf, base->name);
  add_left_context(rbuf, lc_name);
  j_printerr("Error: IW-triphone \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
}
