/*
 * Copyright (c) 1991-2003 Kyoto University
 * Copyright (c) 2000-2003 NAIST
 * All rights reserved
 */

/* wchmm_check.c --- check wchmm structure  */

/* $Id: wchmm_check.c,v 1.8 2003/09/29 06:01:22 ri Exp $ */

#include <julius.h>


/* print misc. info */
static void
print_winfo_w(WORD_INFO *winfo, WORD_ID word)
{
  int i;
  if (word >= winfo->num) return;
  j_printf("--winfo\n");
  j_printf("wname   = %s\n",winfo->wname[word]);
  j_printf("woutput = %s\n",winfo->woutput[word]);
  j_printf("\ntransp  = %s\n", (winfo->is_transparent[word]) ? "yes" : "no");
  j_printf("wlen    = %d\n",winfo->wlen[word]);
  j_printf("wseq    =");
  for (i=0;i<winfo->wlen[word];i++) {
    j_printf(" %s",winfo->wseq[word][i]->name);
  }
  j_printf("\nwseq_def=");
  for (i=0;i<winfo->wlen[word];i++) {
    if (winfo->wseq[word][i]->is_pseudo) {
      j_printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name);
    } else {
      j_printf(" %s",winfo->wseq[word][i]->body.defined->name);
    }
  }
  j_printf("\nwton    = %d\n",winfo->wton[word]);
#ifdef CLASS_NGRAM
  j_printf("cprob   = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word]));
#endif
  
}
static void
print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word)
{
  int i;
  if (word >= wchmm->winfo->num) return;
  j_printf("--wchmm (word)\n");
  j_printf("offset  =");
  for (i=0;i<wchmm->winfo->wlen[word];i++) {
    j_printf(" %d",wchmm->offset[word][i]);
  }
  j_printf("\n");
  j_printf("wordbegin = %d\n",wchmm->wordbegin[word]);
  j_printf("wordend = %d\n",wchmm->wordend[word]);
}
static void
print_wchmm_s(WCHMM_INFO *wchmm, int node)
{
  j_printf("--wchmm (node)\n");
  j_printf("stend   = %d\n",wchmm->stend[node]);
  if (wchmm->state[node].out.state == NULL) {
    j_printf("NO OUTPUT\n");
  }
#ifdef PASS1_IWCD
  else {
    j_printf("outstyle= ");
    switch(wchmm->state[node].outstyle) {
    case AS_STATE:
      j_printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id);
      break;
    case AS_LSET:
      j_printf("AS_LSET  (%d variants)\n", (wchmm->state[node].out.lset)->num);
      break;
    case AS_RSET:
      if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
	j_printf("AS_RSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
		 (wchmm->state[node].out.rset)->hmm->name,
		 (wchmm->state[node].out.rset)->hmm->body.pseudo->name,
		 (wchmm->state[node].out.rset)->state_loc);
      } else {
	j_printf("AS_RSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
		 (wchmm->state[node].out.rset)->hmm->name,
		 (wchmm->state[node].out.rset)->hmm->body.defined->name,
		 (wchmm->state[node].out.rset)->state_loc);
      }
      break;
    case AS_LRSET:
      if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
	j_printf("AS_LRSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
		 (wchmm->state[node].out.lrset)->hmm->name,
		 (wchmm->state[node].out.lrset)->hmm->body.pseudo->name,
		 (wchmm->state[node].out.lrset)->state_loc);
      } else {
	j_printf("AS_LRSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
		 (wchmm->state[node].out.lrset)->hmm->name,
		 (wchmm->state[node].out.lrset)->hmm->body.defined->name,
		 (wchmm->state[node].out.lrset)->state_loc);
      }
      break;
    default:
      j_printf("UNKNOWN???\n");
    }
  }
#endif /* PASS1_IWCD */
#ifndef CATEGORY_TREE
  j_printf("successor list ID: %d\n", wchmm->state[node].scid);
#endif
#ifdef USE_NGRAM
#ifdef UNIGRAM_FACTORING
  if (wchmm->state[node].fscore == LOG_ZERO) {
    j_printf("unigram factoring prob: none\n");
  } else {
    j_printf("unigram factoring prob: %f\n", wchmm->state[node].fscore);
  }
#endif
#endif
}
static void
print_wchmm_s_arc(WCHMM_INFO *wchmm, int node)
{
  A_CELL *ac;
  int i = 0;
  j_printf("arcs:\n");
  for (ac=wchmm->state[node].ac;ac;ac=ac->next) {
    j_printf(" %d %f(%f)\n",ac->arc,ac->a,pow(10.0, ac->a));
    i++;
  }
  j_printf(" total %d arcs\n",i);
}
#ifndef CATEGORY_TREE
static void
print_wchmm_s_successor(WCHMM_INFO *wchmm, int scid)
{
  S_CELL *sc;
  int i = 0;
  j_printf("successor ID: %d\n", scid);
  j_printf("successors:\n");
  if (scid == -1) {
    j_printf(" NO SUCCESSOR\n");
  } else {
    for (sc=wchmm->sclist[scid];sc;sc=sc->next) {
      j_printf(" %d\n",sc->word);
      i++;
    }
  }
  j_printf(" total %d successors\n",i);
}
#endif
static void
print_hmminfo(char *name)
{
  HMM_Logical *l;

  l = htk_hmmdata_lookup_logical(hmminfo, name);
  if (l == NULL) {
    j_printf("no HMM named \"%s\"\n", name);
  } else {
    put_logical_hmm(l);
  }
}
#ifdef USE_NGRAM
static void
print_ngraminfo(NGRAM_INFO *ngram, int nid)
{
  j_printf("-- N-gram entry --\n");
  j_printf("nid  = %d\n", nid);
  j_printf("name = %s\n", ngram->wname[nid]);
}
#endif


void
wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */
{
  char buf1[24], buf2[24];
  int arg;
  WORD_ID argw;
  boolean endflag;

  j_printf("\n\n");
  j_printf("********************************************\n");
  j_printf("********  LM & LEXICON CHECK MODE  *********\n");
  j_printf("********************************************\n");
  j_printf("\n");

  for (endflag = FALSE; endflag == FALSE;) {
    j_printf("===== syntax: command arg (\"H H\" for help) > ");
    scanf("%s %s",buf1, buf2);
    if (strlen(buf1)==0) continue;

    arg = atoi(buf2);
    
    switch(buf1[0]) {
    case 'w':			/* word info */
      argw = arg;
      print_winfo_w(wchmm->winfo, argw);
      print_wchmm_w(wchmm, argw);
      break;
    case 'n':			/* node info */
      print_wchmm_s(wchmm, arg);
      break;
    case 'a':			/* arc list */
      print_wchmm_s_arc(wchmm, arg);
      break;
#if 0
    case 'r':			/* reverse arc list */
      print_wchmm_r_arc(arg);
      break;
#endif
#ifndef CATEGORY_TREE
    case 's':			/* successor word list */
      print_wchmm_s_successor(wchmm, arg);
      break;
#endif
    case 't':			/* node total info of above */
      print_wchmm_s(wchmm, arg);
      print_wchmm_s_arc(wchmm, arg);
#if 0
      print_wchmm_r_arc(arg);
#endif
#ifndef CATEGORY_TREE
      print_wchmm_s_successor(wchmm, arg);
#endif
      break;
    case 'h':			/* hmm state info */
      print_hmminfo(buf2);
      break;
#ifdef USE_NGRAM
    case 'l':			/* N-gram language model info */
      print_ngraminfo(wchmm->ngram, arg);
      break;
#endif
    case 'H':			/* help */
      j_printf("syntax: [command_character] [number(#)]\n");
      j_printf("  w [word_id] ... show word info\n");
      j_printf("  n [state]   ... show wchmm state info\n");
      j_printf("  a [state]   ... show arcs from the state\n");
#if 0
      j_printf("  r [state]   ... show arcs  to  the state\n");
#endif
      j_printf("  s [scid]   ... show successor list\n");
      j_printf("  h [hmmname] ... show HMM info of the name\n");
#ifdef USE_NGRAM
      j_printf("  l [nwid]    ... N-gram entry info\n");
#endif
      j_printf("  H           ... print this help\n");
      break;
    case 'q':			/* quit */
      endflag = TRUE;
      break;
    }
  }
  j_printf("\n");
  j_printf("********************************************\n");
  j_printf("*****  END OF LM & LEXICON CHECK MODE  *****\n");
  j_printf("********************************************\n");
  j_printf("\n");
}

/* check wchmm coherence (for internal debug only!) */
void
check_wchmm(WCHMM_INFO *wchmm)
{
  int i,n,node;
  WORD_ID w;
  boolean ok_flag;
  A_CELL *ac;

  ok_flag = TRUE;

  /* check word-beginning nodes */
  for(i=0;i<wchmm->startnum;i++) {
    node = wchmm->startnode[i];
    if (wchmm->state[node].out.state != NULL) {
      j_printf("Error: word-beginning node %d has output function!\n, node");
      ok_flag = FALSE;
    }
  }
  /* examine if word->state and state->word mapping is correct */
  for(w=0;w<wchmm->winfo->num;w++) {
    if (wchmm->stend[wchmm->wordend[w]] != w) {
      j_printf("Error: no match of word end for word %d!!\n", w);
      ok_flag = FALSE;
    }
  }

  /* check if the last state is unique and has only one output arc */
  i = 0;
  for (n=0;n<wchmm->n;n++) {
    if (wchmm->stend[n] != WORD_INVALID) {
      i++;
      for (ac=wchmm->state[n].ac; ac; ac=ac->next) {
	if (ac->arc == n) continue;
	break;
      }
      if (ac != NULL) {
	j_printf("node %d is shared?\n",n);
	ok_flag = FALSE;
      }
    }
  }
  if (i != wchmm->winfo->num ) {
    j_printf("num of heads of words in wchmm not match word num!!\n");
    j_printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm, wchmm->winfo->num);
    ok_flag = FALSE;
  }

  /* if check failed, go into interactive mode */
  if (!ok_flag) {
    wchmm_check_interactive(wchmm);
  }

  VERMES("  coordination check passed\n");
}

