/*
 * Copyright (c) 1991-2003 Kyoto University
 * Copyright (c) 2000-2003 NAIST
 * All rights reserved
 */

/* factoring_sub.c --- functions to build successor tree and compute factoring value */

/* $Id: factoring_sub.c,v 1.11 2003/12/05 01:40:23 ri Exp $ */

/* for Julius (N-gram) : 2-gram factoring, 1-gram factoring implemented
   for Julian (grammar): deterministic factoring implemented
                         (when CATEGORY_TREE not defined)
			 
   default: 1-gram factoring for Julius, category-tree lexicon (deterministic
   factoring not used) for Julian
*/

#include <julius.h>

#ifndef CATEGORY_TREE		/* category tree does not need factoring */

/*----------------------------------------------------------------------*/
/* successor list:

   $BLZ9=B$2=<-=q$N3F%N!<%I$K3d$jIU$1$i$l$k!$$=$N%N!<%I6&M-C18l$N%j%9%H!%(B
   $B;^J,$+$l$7$?:G=i$N%N!<%I$N$_$,;}$D!%(B
   Lists of words sharing the node, assigned to each lexicon tree node.
   Only the nodes just after the branch have this.
   

          2-o-o - o-o-o - o-o-o          word "A" 
         /
    1-o-o
	 \       4-o-o                   word "B"
	  \     /   
	   3-o-o - 5-o-o - 7-o-o         word "C"
	        \        \ 
		 \        8-o-o          word "D"
		  6-o-o                  word "E"
		   

    node  | successor list (wchmm->sclist[wchmm->state[node].scid])
    =======================
      1   | A B C D E
      2   | A
      3   |   B C D E
      4   |   B
      5   |     C D
      6   |         E
      7   |     C
      8   |       D

   $BCmL\(B: $B>e5-$NC18l(B "A" $B$N>l9g(B,$B8@8l%9%3%"$OC18l=*C<$G$O$J$/%N!<%I(B 2 $B$N0LCV$G(B
   $B4{$K3NDj$9$k!%(B
   Notice: language score for word "A" is determined in node 2 (not word end).

*/

/* $BBh#1%Q%9$K$*$1$k(Bfactoring $B$N<B9T(B: $B<B:]$K$O(B beam.c $B$G9T$J$o$l$k(B

   2-gram factoring: 
   $B!V<!%N!<%I$K(B successor list $B$,B8:_$9$l$P(B, $B$=$N(B successor list $B$NC18l$N(B
     2-gram $B$N:GBgCM$r5a$a$F(B, $BEAHB$7$F$-$F$$$k(Bfactoring $BCM$r99?7$9$k!W(B
     $B"*(Bsuccessor list 1$B$D$NC18l3NDj%N!<%I$G$O<+F0E*$K@5$7$$(B2-gram$B$,3d$jEv$?$k(B

   1-gram factoring:
   $B!V<!%N!<%I$K(B successor list $B$,B8:_$9$l$P(B, $B$=$N(B successor list $B$NC18l$N(B
     1-gram $B$N:GBgCM$r5a$a$F!$EAHB$7$F$-$F$$$k(Bfactoring $BCM$r99?7$9$k!W(B
     $B"*C18l3NDj%N!<%I$G$O(B2-gram $B$r7W;;(B

     $B9bB.2=$H8zN(2=(B: 1-gram $B$N:GBgCM$O%3%s%F%-%9%HHs0MB8(B: $B5/F0;~$KA4It7W;;$7$F(B
     wchmm->state[node].fscore $B$K3JG<$7$F$*$-!$$=$N(B successor list $B$O(B free $B$9$k(B

   DFA$BJ8K!;HMQ;~$N7hDjE*(B factoring (CATEGORY_TREE $B$,(B undefined $B$N>l9g(B):
   $B!V<!%N!<%I$K(B successor list $B$,B8:_$9$l$P(B, $B$=$N(B successor list $BFb$N(B
     $BC18l$N%+%F%4%j$rD4$Y(B, $B0l$D$G$bD>A0C18l$H%+%F%4%jBP@)Ls>e@\B32DG=$J(B
     $BC18l$,$"$l$P!$A+0\$r5v$9!%0l$D$b$J$1$l$PA+0\$5$;$J$$!W(B
*/

/* factoring execution in Viterbi: mainly operated in beam.c

   2-gram factoring:
   "If successor list exist in the next node, compute the maximum 2-gram
    likelihood of the successor list and update the factoring value."
    -> precise 2-gram will be automatically assigned at the node with single
       successor word.

   1-gram factoring:
   "If successor list exist in the next node, get the maximum 1-gram
    likelihood of the successor list and update the factoring value.
    If the next node has only one successor word, update the factoring value
    with true 2-gram."
    For efficiency, 1-gram factoring value (independent of the hypothesis) for
    each successor list is computed before search.  They are stored in
    wchmm->state[node].fscore.

   DFA deterministic factoring (in case Julian and CATEGORY_TREE undefined):
   "If successor list exist in the next node, allow transition only if there
   is any word that can connect to the previous word hypothesis."
   
*/
		  
/*----------------------------------------------------------------------*/
/* successor list $B$r9=C[$9$k!%(B
   $BC18l$N(B ID $B$O(B N-gram $B$G$NHV9f(B($B$"$k$$$O%+%F%4%jHV9f(B), $B<-=q$NC18l(BID$B$G$J$$(B
   $B$3$H$KCm0U!%C18l$O(BID$B$G>:=g$KJ]B8$9$k(B */
/* build whole successor lists.
   IDs in the lists are those of N-gram entry, not word ID in dictionary.
   Words in lists should be ordered by ID. */

/* add word 'w' to successor list at 'node' (w = dictionary ID) */
static void
add_successor(WCHMM_INFO *wchmm, S_CELL **slist, int node, WORD_ID w)
{
  S_CELL *sctmp, *sc;

  /* malloc new */
  sctmp=(S_CELL *) mymalloc(sizeof(S_CELL));
  if (sctmp == NULL) {
    j_error("malloc fault at add_succesor(%d,%d)\n",node,w);
  }
  /* assign word ID */
  sctmp->word = w;
  /* add to list (keeping order) */
  sc = slist[node];
  if (sc == NULL || sctmp->word < sc->word) {
    sctmp->next = sc;
    slist[node] = sctmp;
  } else {
    for(;sc;sc=sc->next) {
      if (sc->next == NULL || sctmp->word < (sc->next)->word) {
	if (sctmp->word == sc->word) break; /* avoid duplication */
	sctmp->next = sc->next;
	sc->next = sctmp;
	break;
      }
    }
  }
}

/* check if successor lists in 'node1' and 'node2' are the same */
static boolean
match_successor(S_CELL **slist, int node1, int node2)
{
  S_CELL *sc1,*sc2;

  /* assume successor is sorted by ID */
  sc1 = slist[node1];
  sc2 = slist[node2];
  for (;;) {
    if (sc1 == NULL || sc2 == NULL) {
      if (sc1 == NULL && sc2 == NULL) {
	return TRUE;
      } else {
	return FALSE;
      }
    } else if (sc1->word != sc2->word) {
      return FALSE;
    }
    sc1 = sc1->next;
    sc2 = sc2->next;
  }
}

/* free successor list at 'node' */
static void
free_successor(S_CELL **slist, int node)
{
  S_CELL *sc;
  S_CELL *sctmp;

  sc = slist[node];
  while (sc != NULL) {
    sctmp = sc;
    sc = sc->next;
    free(sctmp);
  }
  slist[node] = NULL;
}

/* main function to build whole successor list to lexicon tree 'wchmm' */
void
make_successor_list(WCHMM_INFO *wchmm)
{
  S_CELL **sc_list;
  int node;
  WORD_ID w;
  int i;
  int freed_num = 0;
  boolean *freemark;

  VERMES("  make successor lists for factoring...");

  /* 1. initialize */
  sc_list = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->n);
  freemark = (boolean *)mymalloc(sizeof(boolean) * wchmm->n);
  for (node=0;node<wchmm->n;node++) {
    sc_list[node] = NULL;
    freemark[node] = FALSE;
  }

  /* 2. make initial successor list: assign at all possible nodes */
  for (w=0;w<wchmm->winfo->num;w++) {
    /* at each start node of phonemes */
    for (i=0;i<wchmm->winfo->wlen[w];i++) {
      add_successor(wchmm, sc_list, wchmm->offset[w][i], w);
    }
    /* at word end */
    add_successor(wchmm, sc_list, wchmm->wordend[w], w);
  }
  
  /* 3. erase unnecessary successor list */
  /* sucessor list same as the previous node is not needed, so */
  /* parse lexicon tree from every leaf to find the same succesor list */
  for (w=0;w<wchmm->winfo->num;w++) {
    node = wchmm->wordend[w];	/* begin from the word end node */
    i = wchmm->winfo->wlen[w]-1;
    while (i >= 0) {		/* for each phoneme start node */
      if (node == wchmm->offset[w][i]) {
	/*	printf("same:w=%d,phoneloc=%d,node=%d,%d\n", w, i, wchmm->offset[w][i], node);
	{
	  S_CELL *sc;
	  for(sc=wchmm->state[node].sc;sc;sc=sc->next) {
	    printf("%d[%s],", sc->word, ngram->wname[sc->word]);
	  }
	  printf("\n");
	  }*/
	/* word with only 1 state: skip */
	i--;
	continue;
      }
      if (match_successor(sc_list, node, wchmm->offset[w][i])) {
	freemark[node] = TRUE;	/* mark the node */
      }
/* 
 *	 if (freemark[wchmm->offset[w][i]] != FALSE) {
 *	   break;
 *	 }
 */
      node = wchmm->offset[w][i];
      i--;
    }
  }
  /* really free */
  for (node=0;node<wchmm->n;node++) {
    if (freemark[node] == TRUE) {
      freed_num++;
      free_successor(sc_list, node);
    }
  }

  if (debug2_flag) {
    j_printerr("%d freed...", freed_num);
  }

  free(freemark);

  wchmm->sclist = sc_list;

  VERMES("done\n");
}

#ifndef CATEGORY_TREE
/* make index to valid factoring node, and make mapping from node ID
   for factoring cache */
void
make_sc_index(WCHMM_INFO *wchmm)
{
  int n, node;
  S_CELL **new_sclist;

  /* count total valid factoring node and set to wchmm->scnum */
  n = 0;
  for (node=0;node<wchmm->n;node++) {
    if (wchmm->sclist[node] != NULL) n++;
  }
  wchmm->scnum = n;
  /* do garbage collection for the sclist, and */
  /* make mapping from wchmm->state[] --(scid)-->wchmm->sclist */
  n = 0;
  new_sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
  for (node=0;node<wchmm->n;node++) {
    if (wchmm->sclist[node] != NULL) {
      wchmm->state[node].scid = n;
      new_sclist[n] = wchmm->sclist[node];
      n++;
    } else {
      wchmm->state[node].scid = -1;
    }
  }
  free(wchmm->sclist);
  wchmm->sclist = new_sclist;

  /* duplicate scid for HMMs with more than one arc from initial state */
  {
    WORD_ID w;
    int k,j;
    HMM_Logical *ltmp;
    int ltmp_state_num;
    int ato;
    LOGPROB prob;
    int node, scid;
    
    for(w=0;w<wchmm->winfo->num;w++) {
      for(k=0;k<wchmm->winfo->wlen[w];k++) {
	node = wchmm->offset[w][k];
	scid = wchmm->state[node].scid;
	if (scid == -1) continue;
	ltmp = wchmm->winfo->wseq[w][k];
	ltmp_state_num = hmm_logical_state_num(ltmp);
	if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) {
	  j = k + 1;
	  if (j == wchmm->winfo->wlen[w]) {
	    if (wchmm->state[wchmm->wordend[w]].scid == -1) {
	      printf("word %d: factoring node copied for skip phone\n", w);
	      wchmm->state[wchmm->wordend[w]].scid = scid;
	    }
	  } else {
	    if (wchmm->state[wchmm->offset[w][j]].scid == -1) {
	      printf("word %d: factoring node copied for skip phone\n", w);
	      wchmm->state[wchmm->offset[w][j]].scid = scid;
	    }
	  }
	}
	for(ato=1;ato<ltmp_state_num;ato++) {
	  prob = (hmm_logical_trans(ltmp))->a[0][ato];
	  if (prob != LOG_ZERO) {
	    wchmm->state[node+ato-1].scid = scid;
	  }
	}
      }
    }
  }
  /* move scid and fscore on the head state to the head grammar state */
  {
    int i, node;
    A_CELL *ac;
    for(i=0;i<wchmm->startnum;i++) {
      node = wchmm->startnode[i];
      if (wchmm->state[node].out.state != NULL) {
	j_error("Error: outprob exist in word-head node??\n");
      }
      for(ac=wchmm->state[node].ac;ac;ac=ac->next) {
	if (wchmm->state[ac->arc].scid != -1) {
	  if (wchmm->state[node].scid != -1 && wchmm->state[node].scid != wchmm->state[ac->arc].scid) {
	    j_error("Error: different successor list within word-head phone?\n");
	  }
	  wchmm->state[node].scid = wchmm->state[ac->arc].scid;
	  wchmm->state[ac->arc].scid = -1;
	}
#ifdef USE_NGRAM
#ifdef UNIGRAM_FACTORING
	if (wchmm->state[ac->arc].fscore != LOG_ZERO) {
	  if (wchmm->state[node].fscore != LOG_ZERO && wchmm->state[node].fscore != wchmm->state[ac->arc].fscore) {
	    j_error("Error: different fscore within word-head phone?\n");
	  }
	  wchmm->state[node].fscore = wchmm->state[ac->arc].fscore;
	  wchmm->state[ac->arc].fscore = LOG_ZERO;
	}
#endif /* UNIGRAM_FACTORING */
#endif /* USE_NGRAM */
      }
    }
  }
}

#endif /* CATEGORY_TREE */

/* -------------------------------------------------------------------- */
/* factoring computation */

#ifdef USE_NGRAM

/* cache should be conditioned by N-gram entry ID */

/* word-internal factoring value cache:
   a branch node (that has successor list) will keep the factoring value
   on search, and re-use it if incoming token in the next frame has the
   same word context.
 */
static LOGPROB *probcache;	/* cached value: indexed by scid */
static WORD_ID *lastwcache;	/* cached lastword: indexed by scid*/
/* cross-word factoring value cache:
   when computing cross-word transition, (1)factoring values on all word
   start nodes should be computed for each word end, and thus (2)each start
   node has more than one transition within a frame.  So factoring value
   is better cached by the word end (last word) than by nodes.
 */
static LOGPROB **iw_sc_cache;
static int iw_cache_num;
#ifdef HASH_CACHE_IW
static WORD_ID *iw_lw_cache;
#endif
/* once initialized on startup, the factoring value caches will not be
   cleared */

/* initialize factoring cache (once on startup) */
void
max_successor_cache_init(WCHMM_INFO *wchmm)
{
  int i;

  /* for word-internal */
  probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum);
  lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum);
  for (i=0;i<wchmm->scnum;i++) {
    lastwcache[i] = WORD_INVALID;
  }
  /* for cross-word */
#ifdef HASH_CACHE_IW
  iw_cache_num = wchmm->ngram->max_word_num * iw_cache_rate / 100;
  if (iw_cache_num < 10) iw_cache_num = 10;
#else
  iw_cache_num = wchmm->ngram->max_word_num;
#endif /* HASH_CACHE_IW */
  iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * iw_cache_num);
  for (i=0;i<iw_cache_num;i++) {
    iw_sc_cache[i] = NULL;
  }
#ifdef HASH_CACHE_IW
  iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * iw_cache_num);
  for (i=0;i<iw_cache_num;i++) {
    iw_lw_cache[i] = WORD_INVALID;
  }
#endif
}

/* free cross-word factoring cache */
static void
max_successor_prob_iw_free()
{
  int i;
  for (i=0;i<iw_cache_num;i++) {
    if (iw_sc_cache[i] != NULL) free(iw_sc_cache[i]);
    iw_sc_cache[i] = NULL;
  }
}

/* free word-internal factoring cache */
void
max_successor_cache_free()
{
  free(probcache);
  free(lastwcache);
  max_successor_prob_iw_free();
  free(iw_sc_cache);
#ifdef HASH_CACHE_IW
  free(iw_lw_cache);
#endif
}

#ifdef UNIGRAM_FACTORING

/* 1-gram factoring $B$OD>A0C18l$K1F6A$5$l$J$$$N$G!$(B
   $B@hF,%N!<%I$,(B(unigram$B$N:GBgCM$rM?$($k(B) factoring $B%N!<%I$G$"$k$J$i(B
   $B$=$3$rC18l4V(B LM $B%-%c%C%7%e$9$kI,MW$O$J$$(B
   $B$h$C$F!$(Bfactoring $B%N!<%I$G$J$$@hF,%N!<%I$@$1%-%c%C%7%e$9$k(B */
/* In 1-gram factoring, the factoring values are constant in branch (not leaf)
   nodes.  So, in cross-word factoring, the word start node that is branch
   (shared, not leaf) nodes are not need to be cached. */
void
make_iwcache_index(WCHMM_INFO *wchmm)
{
  int i, node, num;

  wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum);
  num = 0;
  for(i=0;i<wchmm->startnum;i++) {
    node = wchmm->startnode[i];
    if (wchmm->state[node].fscore == LOG_ZERO) {	/* not a factoring node (isolated node, has no 1-gram factoring value) */
      wchmm->start2isolate[i] = num;
      num++;
    } else {			/* factoring node (shared) */
      wchmm->start2isolate[i] = -1;
    }
  }
  wchmm->isolatenum = num;
}

/* 1-gram factoring$BCM!J8GDj!K$r7W;;$7$F3JG<$9$k!%(B
   $BC5:wA0$K0lEYFI$s$G$*$1$P$h$$!%(B
   $B7W;;BP>]$O(B 1$B0J>e$N(Bsuccessor word $B$r;}$D!aKvC<$G$J$$%N!<%I!%(B
   $B7W;;8e$O$=$3$N(Bsc$B$OI,MW$J$/$J$k$N$G%U%j!<$9$k(B */
/* compute constant 1-gram factoring value for each factoring node.
   should be called once on startup.
   factoring node: has more than one successor word.
   after computation, the successor list will be freed.
 */
void
calc_all_unigram_factoring_values(WCHMM_INFO *wchmm)
{
  S_CELL *sc, *sctmp;
  LOGPROB tmpprob, maxprob;
  int n;

  for (n=0;n<wchmm->n;n++) {
    wchmm->state[n].fscore = LOG_ZERO; /* initial "undefined" value */
    sc = wchmm->sclist[n];
    if (sc != NULL) {		/* leave undefined if no successor list */
      if (sc->next == NULL) {
	/* only one successor = not factoring node (=leaf node) */
      } else {
	/* compute maximum 1-gram probability in successor words */
	/* and set to fscore */
	maxprob = LOG_ZERO;
	for (sctmp = sc; sctmp; sctmp = sctmp->next) {
	  tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word])
#ifdef CLASS_NGRAM
	    + wchmm->winfo->cprob[sctmp->word] 
#endif
	    ;
	  if (maxprob < tmpprob) maxprob = tmpprob;
	}
	wchmm->state[n].fscore = maxprob;
	free_successor(wchmm->sclist, n);
      }
    }
  }
}

#else  /* ~UNIGRAM_FACTORING */

/* compute 2-gram factoring value for the node and return the value */
static LOGPROB
calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID last_nword, int node)
{
  S_CELL *sc;
  LOGPROB tmpprob, maxprob;

  maxprob = LOG_ZERO;
  for (sc = wchmm->sclist[wchmm->state[node].scid]; sc; sc = sc->next) {
    tmpprob = bi_prob_lr(wchmm->ngram, last_nword, wchmm->winfo->wton[sc->word])
#ifdef CLASS_NGRAM
      + wchmm->winfo->cprob[sc->word]
#endif
      ;
    if (maxprob < tmpprob) maxprob = tmpprob;
  }
  return(maxprob);
}

#endif  /* ~UNIGRAM_FACTORING */

/* $B%N!<%I$KBP1~$9$k(Bfactoring$BCM$rJV$9(B($BC18lFb(B) */
/* For word-internal: return factoring value on node "node" with previous
   word "lastword", consulting cache. */
LOGPROB
max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
{
  LOGPROB maxprob;
  WORD_ID last_nword, w;
  int scid;

  if (lastword != WORD_INVALID) { /* return nothing if no previous word */
    last_nword = wchmm->winfo->wton[lastword];
#ifdef UNIGRAM_FACTORING
    if (wchmm->state[node].fscore != LOG_ZERO) {
      /* return 1-gram factoring value already calced */
      return(wchmm->state[node].fscore);
    } else {
      scid = wchmm->state[node].scid;
      /* return precise 2-gram score */
      if (last_nword != lastwcache[scid]) {
	/* calc and cache */
	w = wchmm->sclist[wchmm->state[node].scid]->word;
	maxprob = bi_prob_lr(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
#ifdef CLASS_NGRAM
	  + wchmm->winfo->cprob[w]
#endif
	  ;
	lastwcache[scid] = last_nword;
	probcache[scid] = maxprob;
	return(maxprob);
      } else {
	/* return cached */
	return (probcache[scid]);
      }
    }
#else  /* UNIGRAM_FACTORING */
    /* 2-gram */
    scid = wchmm->state[node].scid;
    if (last_nword != lastwcache[scid]) {
      maxprob = calc_successor_prob(wchmm, last_nword, node);
      /* store to cache */
      lastwcache[scid] = last_nword;
      probcache[scid] = maxprob;
      return(maxprob);
    } else {
      return (probcache[scid]);
    }
#endif /* UNIGRAM_FACTORING */
  } else {
    return(0.0);
#if 0
    maxprob = LOG_ZERO;
    for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
      tmpprob = uni_prob(wchmm->ngram, sc->word);
      if (maxprob < tmpprob) maxprob = tmpprob;
    }
    return(maxprob);
#endif
  }

}

/* $B%N!<%I$KBP1~$9$k(Bfactoring$BCM$rJV$9(B($BC18l4V(B)
   factoring$B$,I,MW$J$9$Y$F$NC18l@hF,$N7W;;$r9T$J$C$F$+$i!$$=$NG[Ns$rJV$9(B */
/* For cross-word: return an array of factoring values for all word-start node
   when previous word context is 'lastword', consulting cache.
 */
#ifdef HASH_CACHE_IW
#define hashid(A) A % iw_cache_limit
#endif
LOGPROB *
max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword)
{
  int i, j, x, node;
  int last_nword;
  WORD_ID w;

  last_nword = wchmm->winfo->wton[lastword];
#ifdef HASH_CACHE_IW
  x = hashid(last_nword);
  if (iw_lw_cache[x] == last_nword) { /* cache hit */
    return(iw_sc_cache[x]);
  }
#else  /* full cache */
  if (iw_sc_cache[last_nword] != NULL) { /* cache hit */
    return(iw_sc_cache[last_nword]);
  }
  x = last_nword;
  /* cache mis-hit, calc probs and cache them as new */
#endif
  /* allocate cache memory */
  if (iw_sc_cache[x] == NULL) {
#ifdef UNIGRAM_FACTORING
    iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
#else
    iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
#endif
    if (iw_sc_cache[x] == NULL) { /* malloc failed */
      /* clear existing cache, and retry */
      max_successor_prob_iw_free();
      j_printf("inter-word LM cache (%dMB) rehashed\n",
	       (iw_cache_num * 
#ifdef UNIGRAM_FACTORING
		wchmm->isolatenum
#else
		wchmm->startnum
#endif
		) / 1000 * sizeof(LOGPROB) / 1000);
#ifdef UNIGRAM_FACTORING
      iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
#else
      iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
#endif
      if (iw_sc_cache[x] == NULL) { /* malloc failed again? */
	j_error("max_successor_prob_iw: cannot malloc\n");
      }
    }
  }

  /* calc prob for all startid */
#ifdef UNIGRAM_FACTORING
  for (j=0;j<wchmm->startnum;j++) {
    i = wchmm->start2isolate[j];
    if (i == -1) continue;
    node = wchmm->startnode[j];
    if (wchmm->state[node].fscore != LOG_ZERO) {
      /* should not happen!!! below is just for debugging */
      j_error("No!!\n");
    } else {
      w = wchmm->sclist[wchmm->state[node].scid]->word;
      iw_sc_cache[x][i] = bi_prob_lr(ngram, last_nword, wchmm->winfo->wton[w])
#ifdef CLASS_NGRAM
	+ wchmm->winfo->cprob[w]
#endif
	;
    }
  }
#else  /* ~UNIGRAM_FACTORING */
  for (i=0;i<wchmm->startnum;i++) {
    node = wchmm->startnode[i];
    iw_sc_cache[x][i] = calc_successor_prob(wchmm, last_nword, node);
  }
#endif
#ifdef HASH_CACHE_IW
  iw_lw_cache[x] = last_nword;
#endif

  return(iw_sc_cache[x]);
}


#else /* USE_DFA --- $B%+%F%4%jLZ$J$iITI,MW(B */

/* $B7hDjE*(B factoring: $B%N!<%I(B node $B$KD>A0C18lMzNr(B lastword $B$r;}$D%H!<%/%s$,(B
   $B%+%F%4%jBP@)Ls>e@\B3$G$-$k$+$I$&$+$r(B TRUE/FALSE$B$GJV$9!%(B*/
/* deterministic factoring: return whether a token with last word ID
   "lastword" can go into the branch node "node" by category-pair constraint.
*/
boolean
can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
{
  int lc;
  S_CELL *sc;

  /* return TRUE if at least one subtree word can connect */

  if (lastword == WORD_INVALID) { /* case at beginning-of-word */
    for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
      if (dfa_cp_begin(wchmm->dfa, sc->word) == TRUE) return(TRUE);
    }
    return(FALSE);
  } else {
    lc = wchmm->winfo->wton[lastword];
    for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
      if (dfa_cp(wchmm->dfa, lc, sc->word) == TRUE) return(TRUE);
    }
    return(FALSE);
  }
}

#endif /* USE_DFA */


#endif /* CATEGORY_TREE */
