/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* ngram_read_bin.c --- read in n-gram data in bingram (binary format for Julius) */

/* $Id: ngram_read_bin.c,v 1.5 2002/09/11 22:01:50 ri Exp $ */

#include <sent/stddefs.h>
#include <sent/ngram2.h>

/* binary read function with byte swap (assume file is BIG ENDIAN) */
static void
rdn(FILE *fp, void *buf, size_t unitbyte, int unitnum)
{
  size_t tmp, count;
  if ((tmp = myfread(buf, unitbyte, unitnum, fp)) < unitnum) {
    perror("ngram_read_bin");
    j_error("read failed\n");
  }
#ifndef WORDS_BIGENDIAN
  if (unitbyte != 1) {
    swap_bytes(buf, unitbyte, unitnum);
  }
#endif
}

/* check bingram version */
static void
check_header(FILE *fp)
{
  char buf[512];
  rdn(fp, buf, 1, 512);

  if (! strnmatch(buf, BINGRAM_IDSTR, strlen(BINGRAM_IDSTR))) {
    /* not a v3 bingram file */
    j_printerr("Error: invalid header, you probably use old bingram\n");
    j_printerr("Error: if so, please re-make with newer mkbingram that comes with Julius-2.0 or later\n");
  }
  /*j_printf("%s",buf);*/
}

/* read in N-gram binary data */
boolean
ngram_read_bin(FILE *fp, NGRAM_INFO *ndata)
{
  int i,n,len;
  char *w, *p;
  
  ndata->from_bin = TRUE;

  /* check initial header */
  check_header(fp);
  
  /* read total info and set max_word_num */
  for(n=0;n<MAX_N;n++) {
    rdn(fp, &(ndata->ngram_num[n]), sizeof(NNID), 1);
  }
  ndata->max_word_num = ndata->ngram_num[0];
  
  /* read wname */
  rdn(fp, &len, sizeof(int), 1);
  w = mymalloc(len);
  rdn(fp, w, 1, len);
  /* assign... */
  ndata->wname = (char **)mymalloc(sizeof(char *)*ndata->ngram_num[0]);
  p = w; i = 0;
  while (p < w + len) {
    ndata->wname[i++] = p;
    while(*p != '\0') p++;
    p++;
  }
  if (i != ndata->ngram_num[0]) {
    j_error("wname error??\n");
  }
  /* malloc all */
  ndata->p = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[0]);
  ndata->bo_wt_lr = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[0]);
  ndata->bo_wt_rl = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[0]);
  ndata->n2_bgn = (NNID *)mymalloc(sizeof(NNID) * ndata->ngram_num[0]);
  ndata->n2_num = (WORD_ID *)mymalloc(sizeof(WORD_ID) * ndata->ngram_num[0]);
  ndata->n2tonid = (WORD_ID *)mymalloc(sizeof(WORD_ID) * ndata->ngram_num[1]);
  ndata->p_lr = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[1]);
  ndata->p_rl = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[1]);
  ndata->bo_wt_rrl = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[1]);
  ndata->n3_bgn = (NNID *)mymalloc(sizeof(NNID) * ndata->ngram_num[1]);
  ndata->n3_num = (WORD_ID *)mymalloc(sizeof(WORD_ID) * ndata->ngram_num[1]);
  ndata->n3tonid = (WORD_ID *)mymalloc(sizeof(WORD_ID) * ndata->ngram_num[2]);
  ndata->p_rrl = (LOGPROB *)mymalloc(sizeof(LOGPROB) * ndata->ngram_num[2]);
  
  /* read 1-gram */
  j_printerr("1-gram.");
  rdn(fp, ndata->p, sizeof(LOGPROB), ndata->ngram_num[0]);
  j_printerr(".");
  rdn(fp, ndata->bo_wt_lr, sizeof(LOGPROB), ndata->ngram_num[0]);
  j_printerr(".");
  rdn(fp, ndata->bo_wt_rl, sizeof(LOGPROB), ndata->ngram_num[0]);
  j_printerr(".");
  rdn(fp, ndata->n2_bgn, sizeof(NNID), ndata->ngram_num[0]);
  j_printerr(".");
  rdn(fp, ndata->n2_num, sizeof(WORD_ID), ndata->ngram_num[0]);
  
  /* read 2-gram*/
  j_printerr("2-gram.");
  rdn(fp, ndata->n2tonid, sizeof(WORD_ID), ndata->ngram_num[1]);
  j_printerr(".");
  rdn(fp, ndata->p_lr, sizeof(LOGPROB), ndata->ngram_num[1]);
  j_printerr(".");
  rdn(fp, ndata->p_rl, sizeof(LOGPROB), ndata->ngram_num[1]);
  j_printerr(".");
  rdn(fp, ndata->bo_wt_rrl, sizeof(LOGPROB), ndata->ngram_num[1]);
  j_printerr(".");
  rdn(fp, ndata->n3_bgn, sizeof(NNID), ndata->ngram_num[1]);
  j_printerr(".");
  rdn(fp, ndata->n3_num, sizeof(WORD_ID), ndata->ngram_num[1]);

  /* read 3-gram*/
  j_printerr("3-gram.");
  rdn(fp, ndata->n3tonid, sizeof(WORD_ID), ndata->ngram_num[2]);
  j_printerr(".");
  rdn(fp, ndata->p_rrl, sizeof(LOGPROB), ndata->ngram_num[2]);

  /* make word search tree for later lookup */
  j_printerr("indexing...");
  ngram_make_lookup_tree(ndata);

  /* set unknown id */
  set_unknown_id(ndata);
  
  return TRUE;
}
