/**
 * @file   wav2mfcc.c
 * @author Akinobu Lee
 * @date   Sun Sep 18 19:40:34 2005
 * 
 * <JA>
 * @brief  ȷħ٥ȥѴ롥
 *
 * Ϥ줿ȷ顤ħ٥ȥФޤ
 * Julius/JulianФǤħ٥ȥϡ
 * MFCC_{0|E}_D[_Z][_N]  {25|26} ΤΤΤߤǤ
 * ΤۤĹե졼ॷեȡӰ襫åȤʤɤΥѥ᡼Ǥޤ
 * ǧˤϡǥΥإåȥåԤ졤CMN̵ͭʤ
 * ꤵޤ
 * 
 * δؿϡХåեѤ줿ȷǡ٤
 * ħ٥ȥѴΤǡեϤʤɤѤޤ
 * ޥϤʤɤǡϤʿԤǧԤϡδؿǤϤʤ
 * realtime-1stpass.c ǹԤޤ
 * </JA>
 * 
 * <EN>
 * @brief  Convert speech waveform to parameter vector sequence.
 *
 * Parameter vector sequence extraction of input speech is done
 * here.  Only parameter format of MFCC_{0|E}_D[_Z][_N] with {25|26}
 * dimensions can be extracted within Julius/Julian, so acoustic model
 * for recognition should be trained by one of these parameter types.
 * You can specify other parameters such as window size, frame shift,
 * high/low frequency cut-off via runtime options.  At startup, Julius
 * will check for the parameter types of acoustic model if it conforms
 * the limitation, and determine whether other additional processing
 * is needed such as Cepstral Mean Normalization.
 *
 * Functions below are used to convert fully buffered whole sentence
 * utterance, and typically used for audio file input.  When input
 * is concurrently processed with recognition process at 1st pass, 
 * in case of microphone input, the MFCC computation will be done
 * within functions in realtime-1stpass.c instead of these.
 * </EN>
 * 
 * $Revision:$
 * 
 */
/*
 * Copyright (c) 1991-2005 Kawahara Lab., Kyoto University
 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
 * Copyright (c) 2005      Julius project team, Nagoya Institute of Technology
 * All rights reserved
 */

#include <julius.h>

#include <sys/stat.h>

/* make MFCC_E_D_N_Z from speech[0..speechlen-1] and returns
   newly malloced param */
/** 
 * <JA>
 * ȷǡ 25 MFCC_E_D_N_Z (MFCC12++power +
 * ХåեΤǤ CMN)
 * ħѥ᡼Ф롥
 * 
 * @param speech [in] ȷǡ
 * @param speechlen [in] @a speech Ĺñ̡ץ
 * 
 * @return ˳դХѥ᡼٥ȥ뤬ǼƤ
 * ѥ᡼¤ΤؤΥݥ󥿤֤
 * </JA>
 * <EN>
 * Extract MFCC_E_D_N_Z parameters (MFCC(12 dim.) + delta + delta power)
 * with sentence CMN from given waveform.
 * 
 * @param speech [in] buffer of speech waveform
 * @param speechlen [in] length of @a speech in samples
 * 
 * @return pointer to newly allocated parameter structure data with extracted
 * MFCC vector sequence.
 * </EN>
 */
HTK_Param *new_wav2mfcc(SP16 speech[], int speechlen)
{
  HTK_Param *param;
  Value para;			/* parameters for Wav2MFCC */
  int framenum;
  int i;
  int len;

  /* set parameters */
  para.smp_period = smpPeriod;
  para.framesize  = fsize;
  para.frameshift = fshift;
  para.preEmph    = DEF_PREENPH;
  para.mfcc_dim   = DEF_MFCCDIM;
  para.lifter     = DEF_CEPLIF;
  para.delWin     = delwin;
  para.silFloor   = DEF_SILFLOOR;
  para.hipass     = hipass;
  para.lopass     = lopass;
  para.c0         = c0_required;
  /* para.escale     = DEF_ESCALE; */
  para.escale     = 1.0;
  para.fbank_num  = DEF_FBANK;
  /* para.cmn        = FALSE;*/
  para.cmn        = cmn_required;
  para.enormal    = FALSE;
  para.raw_e      = FALSE;
  /* for SS */
  para.ss_alpha   = ssalpha;
  para.ss_floor   = ssfloor;

  para.vec_num = (para.mfcc_dim + 1) * 2;

  if (ssload_filename && ssbuf == NULL) {
    /* load noise spectrum for spectral subtraction from file (once) */
    if ((ssbuf = new_SS_load_from_file(ssload_filename, &sslen)) == NULL) {
      j_error("Error: failed to read \"%s\"\n", ssload_filename);
    }
  }

  if (sscalc) {
    /* compute noise spectrum from head silence for each input */
    len = sscalc_len * smpFreq / 1000;
    if (len > speechlen) len = speechlen;
#ifdef SSDEBUG
    printf("[%d]\n", len);
#endif
    ssbuf = new_SS_calculate(speech, len, para, &sslen);
  }
#ifdef SSDEBUG
  {
    int i;
    for(i=0;i<sslen;i++) {
      printf("%d: %f\n", i, ssbuf[i]);
    }
  }
#endif
  
  /* calculate frame length from speech length, frame size and frame shift */
  framenum = (int)((speechlen - para.framesize) / para.frameshift) + 1;
  if (framenum < 1) {
    j_printerr("input too short (%d samples), ignored\n", speechlen);
    return NULL;
  }
  
  /* malloc new param */
  param = new_param();
  param->parvec = (VECT **)mymalloc(sizeof(VECT *) * framenum);
  for(i=0;i<framenum;i++) {
    param->parvec[i] = (VECT *)mymalloc(sizeof(VECT) * para.vec_num);
  }

  /* make MFCC_E_D_(N_)_Z from speech data */
  /* (bogus) needs conversion here if intergerized */
  Wav2MFCC_E_D(speech, param->parvec, para, speechlen, ssbuf, sslen);

  /* set miscellaneous parameters */
  param->header.samplenum = framenum;
  param->header.wshift = para.smp_period * para.frameshift;
  param->header.sampsize = para.vec_num * sizeof(VECT); /* not compressed */
  param->header.samptype = F_MFCC | F_DELTA;
  if (para.c0) {
    param->header.samptype |= F_ZEROTH;
  } else {
    param->header.samptype |= F_ENERGY;
  }
  if (para.cmn) {
    param->header.samptype |= F_CEPNORM;
  }
  param->veclen = para.vec_num;
  param->samplenum = framenum;

  return param;
}

