/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* chkhmmlit.c --- check existence of all possible triphone in HMMList */

/* $Id: chkhmmlist.c,v 1.3 2002/09/11 22:01:50 ri Exp $ */

/* consider vocabulary: only ones that can be appear
   in word-internal and cross-word are checked */

/* two main routine:
   make_base_phone():  build basephone info - basephone list and bgn/end mark
   test_interword_triphone(): check triphone using the basephone info
 */


#include <sent/htk_hmm.h>
#include <sent/vocabulary.h>

/* make a list of base phone */
/* by gathering center phone name from logical HMM name */
void
make_hmm_basephone_list(HTK_HMM_INFO *hmminfo)
{
  HMM_Logical *lg;
  char p[50];
  BASEPHONE *match, *new;
  APATNODE *root;
  int n;

  n = 0;
  root = NULL;
  for(lg=hmminfo->lgstart; lg; lg=lg->next) {
    center_name(lg->name, p);
    if (root != NULL) {
      match = aptree_search_data(p, root);
      if (strmatch(match->name, p)) continue;
    }
    new = (BASEPHONE *)mybmalloc(sizeof(BASEPHONE));
    new->bgnflag = FALSE;
    new->endflag = FALSE;
    new->name = (char *)mybmalloc(strlen(p)+1);
    strcpy(new->name, p);
    if (root == NULL) root = aptree_make_root_node(new);
    else aptree_add_entry(new->name, new, match->name, &root);
    n++;
  }
  hmminfo->basephone.num = n;
  hmminfo->basephone.root = root;
}

/* print all basephones */
static void
print_callback_detail(void *x)
{
  BASEPHONE *b = x;
  j_printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag);
}
void
print_all_basephone_detail(HMM_basephone *base)
{
  aptree_traverse_and_do(base->root, print_callback_detail);
}
static void
print_callback_name(void *x)
{
  BASEPHONE *b = x;
  j_printf("%s, ", b->name);
}
void
print_all_basephone_name(HMM_basephone *base)
{
  aptree_traverse_and_do(base->root, print_callback_name);
  j_printf("\n");
}

/* count word-beginning / word-edge mark of basephone */
int bncnt, edcnt;
static void
count_callback(void *x)
{
  BASEPHONE *b = x;
  if (b->bgnflag) bncnt++;
  if (b->endflag) edcnt++;
}
static void
count_all_phone(HMM_basephone *base)
{
  bncnt = edcnt = 0;
  aptree_traverse_and_do(base->root, count_callback);
  base->bgnnum = bncnt;
  base->endnum = edcnt;
}


/* mark basephone that can be either beginning or end of a word */
static void
mark_word_edge(WORD_INFO *winfo, HMM_basephone *base)
{
  WORD_ID w;
  char p[50];
  char *key;
  BASEPHONE *match;

  /* mark what is at beginning of word (can be right context) */
  for(w=0;w<winfo->num;w++) {
    if (w == winfo->head_silwid) continue;
    key = center_name(winfo->wseq[w][0]->name, p);
    match = aptree_search_data(key, base->root);
    if (strmatch(match->name, key)) {
      match->bgnflag = TRUE;
    } else {
      /* not found!!! */
      j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
    }
  }
  /* mark what is at end of word (can be left context) */
  for(w=0;w<winfo->num;w++) {
    if (w == winfo->tail_silwid) continue;
    key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p);
    match = aptree_search_data(key, base->root);
    if (strmatch(match->name, key)) {
      match->endflag = TRUE;
    } else {
      /* not found!!! */
      j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
    }
  }
}


/* check if all possible triphones are exist in logical HMM */
/* temporal storage for aptree() callback */
static HTK_HMM_INFO *local_hmminfo;
static WORD_INFO *local_winfo;
static APATNODE *local_root;
static WORD_ID current_w;
static char gbuf[50];

/* add unknown (error) triphone to error list */
static APATNODE *error_root;	/* error phone list */
static int error_num;		/* number of error phone  */
static void
add_to_error(char *lostname)
{
  char *match, *new;
  if (error_root != NULL) {
    match = aptree_search_data(lostname, error_root);
    if (strmatch(match, lostname)) return;
  }
  new = (char *)mybmalloc(strlen(lostname)+1);
  strcpy(new, lostname);
  if (error_root == NULL) error_root = aptree_make_root_node(new);
  else aptree_add_entry(new, new, match, &error_root);

  error_num++;
}
static void
print_error_callback(void *x)
{
  char *p = x;
  j_printf("%s\n", p);
}

/* check existence of triphone for "x - current_w" and "current_w + x" */
static void
triphone_callback_normal(void *x)
{
  BASEPHONE *b = x;
  WORD_ID w = current_w;
  HMM_Logical *lg, *found;
  BASEPHONE *match;

  if (b->endflag) {		/* x can appear as end of word */
    lg = local_winfo->wseq[w][0];
    strcpy(gbuf, lg->name);
    add_left_context(gbuf, b->name);
    /* printf("checking \"%s\" - \"%s\"\n", b->name, lg->name); */
    if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
      if (lg->is_pseudo) {
	j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
	add_to_error(gbuf);
      }
    }
  }
  if (b->bgnflag) {		/* x can appear as beginning of word */
    lg = local_winfo->wseq[w][local_winfo->wlen[w]-1];
    strcpy(gbuf, lg->name);
    add_right_context(gbuf, b->name);
    /* printf("checking \"%s\" - \"%s\"\n", lg->name, b->name); */
    if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
      if (lg->is_pseudo) {
	j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
	add_to_error(gbuf);
      }
    }
  }
}

/* for words with only one phone, all combination of "x - current_w + x"
   should be checked */
static void
triphone_callback_right(void *x)
{
  BASEPHONE *b = x;
  WORD_ID w = current_w;
  HMM_Logical *lg, *found;
  BASEPHONE *match;
  char buf[50];

  if (b->bgnflag) {
    lg = local_winfo->wseq[w][0];
    strcpy(buf, gbuf);
    add_right_context(buf, b->name);
    /* printf("	   checking \"%s\" - \"%s\"\n", gbuf, b->name); */
    if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) {
      if (lg->is_pseudo) {
	j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name);
	add_to_error(buf);
      }
    }
  }
}
static void
triphone_callback_left(void *x)
{
  BASEPHONE *b = x;
  WORD_ID w = current_w;
  HMM_Logical *lg, *found;
  BASEPHONE *match;

  if (b->endflag) {
    lg = local_winfo->wseq[w][0];
    strcpy(gbuf, lg->name);
    add_left_context(gbuf, b->name);
    /*printf("continue checking \"%s\" - \"%s\"\n", b->name, lg->name);*/
    aptree_traverse_and_do(local_root, triphone_callback_right);
  }
}

/* check if all possible triphones are exist in logical HMM */
/* possible triphones are generated using base phones */
void
test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
{
  WORD_ID w;
  local_hmminfo = hmminfo;
  local_winfo = winfo;
  local_root = hmminfo->basephone.root;
  error_root = NULL;
  error_num = 0;

  j_printf("Inter-word triphone existence test...\n");
  for(w=0;w<winfo->num;w++) {
    current_w = w;
    if (winfo->wlen[w] > 1) {
      /* check beginning phone and ending phone of this word */
      aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal);
    } else {
      /* for word of only 1 phoneme, check both */
      aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left);
    }
  }
  if (error_root == NULL) {
    j_printf("passed\n");
  } else {
    j_printf("following triphones are missing in HMMList:\n");
    aptree_traverse_and_do(error_root, print_error_callback);
    j_printf("total %d missing inter-word triphones\n", error_num);
  }
}


/* build basephone info */
void
make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
{
  /* gather base phones and word-{head,tail} phones */
  j_printf("Exploring HMM database and lexicon tree:\n");
  mark_word_edge(winfo, &(hmminfo->basephone));
  count_all_phone(&(hmminfo->basephone));
}
  
void
print_phone_info(HTK_HMM_INFO *hmminfo)
{
  /* output information */
  j_printf("%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum);
  if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) {
    j_printf("   no HMMList, physical HMM names are redirected to logicalHMM\n");
  } else {
    if (hmminfo->is_triphone) {
      j_printf("%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
    } else {
      j_printf("%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
    }
  }
  if (hmminfo->totalpseudonum != 0) {
    j_printf("%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum);
  }
  j_printf("%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum);
  j_printf("%5d base phones in logical HMM\n", hmminfo->basephone.num);
  j_printf("%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum);

}
