
/************************************************************************/
/*    wav2mfcc.c   Convert Speech file to MFCC_E_D_(Z) file             */
/*----------------------------------------------------------------------*/
/*    Author    : Yuichiro Nakano                                       */
/*                                                                      */
/*    Copyright(C) Yuichiro Nakano 1996-1998                            */
/*----------------------------------------------------------------------*/
/************************************************************************/


/* $Id: wav2mfcc.c,v 1.3 2002/08/23 11:44:06 ri Exp $ */

#include <sent/stddefs.h>
#include <sent/mfcc.h>

/* 
 *  Convert wave -> MFCC_E_D_(Z)
 *  do spectral subtraction if ssbuf != NULL
 */
int Wav2MFCC_E_D(SP16 *wave, float **mfcc, Value para, int nSamples, float *ssbuf, int ssbuflen)
{
  float *bf;                        /* Work space for FFT */
  double *fbank;                    /* Filterbank */
  float *energy;            /* Raw Energy */ 
  int i, k, t;
  int end = 0, start = 1;
  int frame_num;                    /* Number of samples in output file */
  FBankInfo fb;

  /* Get filterbank information */
  fb = InitFBank(para);
  
  if((fbank = (double *)mymalloc((para.fbank_num+1)*sizeof(double))) == NULL){
    j_error("Error: Wav2MFCC_E_D: failed to malloc\n");
  }
  if((bf = (float *)mymalloc(fb.fftN * sizeof(float))) == NULL){
    j_error("Error: Wav2MFCC_E_D: failed to malloc\n");
  }

  if (ssbuf != NULL) {
    /* check ssbuf length */
    if (fb.fftN != ssbuflen) {
      j_error("Error: Wav2MFCC_E_D: noise spectrum length not match\n");
    }
  }

  frame_num = (int)((nSamples - para.framesize) / para.frameshift) + 1;

  energy = (float *)mymalloc(sizeof(float)*frame_num);

  for(t = 0; t < frame_num; t++){
    if(end != 0) start = end - (para.framesize - para.frameshift) - 1;

    k = 1;
    for(i = start; i <= start + para.framesize; i++){
      bf[k] = (float)wave[i - 1];  k++;
    }
    end = i;
    
    /* Calculate Log Raw Energy */
    if(para.raw_e)
      energy[t] = CalcLogRawE(bf, para.framesize); 

    /* Pre-emphasise */
    PreEmphasise(bf, para);
    
    /* Hamming Window */
    Hamming(bf, para.framesize);

    /* Calculate Log Energy */
    if(!para.raw_e && !para.c0)
      energy[t] = CalcLogRawE(bf, para.framesize);

    /* Filterbank */
    MakeFBank(bf, fbank, fb, para, ssbuf);

    /* MFCC */
    MakeMFCC(fbank, mfcc[t], para);   

    /* 0'th Cepstral parameter */
    if (para.c0) {
      energy[t] = CalcC0(fbank, para);
    }
 
    /* Weight Cepstrum */
    WeightCepstrum(mfcc[t], para);

  }

  /* Normalise Log Energy */
  if(para.enormal)
    NormaliseLogE(mfcc, energy, frame_num, para);
  else{
    for(t = 0; t < frame_num; t++)
      mfcc[t][para.mfcc_dim] = energy[t];
  }

  /* Delta */
  Delta(mfcc, frame_num, para);

  /* Cepstrum Mean Normalization */
  if(para.cmn)
    CMN(mfcc, frame_num, para.mfcc_dim);

  FreeFBank(fb);
  free(fbank);
  free(bf);
  free(energy);
  return(frame_num);
}


/* 
 *  Calculate Log Raw Energy 
 */
float CalcLogRawE(float *wave, int framesize)
{		   
  int i;
  double raw_E = 0.0;
  float energy;

  for(i = 1; i <= framesize; i++)
    raw_E += wave[i] * wave[i];
  energy = (float)log(raw_E);

  return(energy);
}


/* 
 *  Apply hamming window
 */
void Hamming(float *wave, int framesize)
{
  int i;
  float a;

  a = 2 * PI / (framesize - 1);
  for(i = 1; i <= framesize; i++)
    wave[i] *= 0.54 - 0.46 * cos(a * (i - 1));
}


/* 
 *  Apply pre-emphasis filter
 */
void PreEmphasise (float *wave, Value para)
{
  int i;
   
  for(i = para.framesize; i >= 2; i--)
    wave[i] -= wave[i - 1] * para.preEmph;
  wave[1] *= 1.0 - para.preEmph;  
}


/* 
 *  Re-scale cepstral coefficients
 */
void WeightCepstrum (float *mfcc, Value para)
{
  int i;
  float a, b, *cepWin;
  
  if((cepWin = (float *)mymalloc(para.mfcc_dim * sizeof(float))) == NULL){
    j_error("WeightCepstrum: failed to malloc\n");
  }
  a = PI / para.lifter;
  b = para.lifter / 2.0;
  
  for(i = 0; i < para.mfcc_dim; i++){
    cepWin[i] = 1.0 + b * sin((i + 1) * a);
    mfcc[i] *= cepWin[i];
  }
  
  free(cepWin);
}


/* 
 *  Return mel-frequency
 */
float Mel(int k, float fres)
{
  return(1127 * log(1 + (k-1) * fres));
}


/* 
 *  Get filterbank information
 */
FBankInfo InitFBank(Value para)
{
  FBankInfo fb;
  float mlo, mhi, ms, melk;
  int k, chan, maxChan, nv2;

  /* Calculate FFT size */
  fb.fftN = 2;  fb.n = 1;
  while(para.framesize > fb.fftN){
    fb.fftN *= 2; fb.n++;
  }

  nv2 = fb.fftN / 2;
  fb.fres = 1.0E7 / (para.smp_period * fb.fftN * 700.0);
  maxChan = para.fbank_num + 1;
  fb.klo = 2;   fb.khi = nv2;
  mlo = 0;      mhi = Mel(nv2 + 1, fb.fres);

  /* lo pass filter */
  if (para.lopass >= 0) {
    mlo = 1127*log(1+(float)para.lopass/700.0);
    fb.klo = ((float)para.lopass * para.smp_period * 1.0e-7 * fb.fftN) + 2.5;
    if (fb.klo<2) fb.klo = 2;
  }
  /* hi pass filter */
  if (para.hipass >= 0) {
    mhi = 1127*log(1+(float)para.hipass/700.0);
    fb.khi = ((float)para.hipass * para.smp_period * 1.0e-7 * fb.fftN) + 0.5;
    if (fb.khi>nv2) fb.khi = nv2;
  }

  /* Create vector of fbank centre frequencies */
  if((fb.cf = (float *)mymalloc((maxChan + 1) * sizeof(float))) == NULL){
    j_error("InitFBank: failed to malloc\n");
  }
  ms = mhi - mlo;
  for (chan = 1; chan <= maxChan; chan++) 
    fb.cf[chan] = ((float)chan / maxChan)*ms + mlo;

  /* Create loChan map, loChan[fftindex] -> lower channel index */
  if((fb.loChan = (short *)mymalloc((nv2 + 1) * sizeof(short))) == NULL){
    j_error("InitFBank: failed to malloc\n");
  }
  for(k = 1, chan = 1; k <= nv2; k++){
    if (k < fb.klo || k > fb.khi) fb.loChan[k] = -1;
    else {
      melk = Mel(k, fb.fres);
      while (fb.cf[chan] < melk && chan <= maxChan) ++chan;
      fb.loChan[k] = chan - 1;
    }
  }

  /* Create vector of lower channel weights */   
  if((fb.loWt = (float *)mymalloc((nv2 + 1) * sizeof(float))) == NULL){
    j_error("InitFBank: failed to malloc\n");
  }
  for(k = 1; k <= nv2; k++) {
    chan = fb.loChan[k];
    if (k < fb.klo || k > fb.khi) fb.loWt[k] = 0.0;
    else {
      if (chan > 0) 
	fb.loWt[k] = (fb.cf[chan + 1] - Mel(k, fb.fres)) / (fb.cf[chan + 1] - fb.cf[chan]);
      else
	fb.loWt[k] = (fb.cf[1] - Mel(k, fb.fres)) / (fb.cf[1] - mlo);
    }
  }
  
  /* Create workspace for fft */
  if((fb.Re = (float *)mymalloc((fb.fftN + 1) * sizeof(float))) == NULL){
    j_error("InitFBank: failed to malloc\n");
  }
  if((fb.Im = (float *)mymalloc((fb.fftN + 1) * sizeof(float))) == NULL){
    j_error("InitFBank: failed to malloc\n");
  }
  return(fb);
}

/*
 * free FBankInfo
 */
void
FreeFBank(FBankInfo fb)
{
  free(fb.cf);
  free(fb.loChan);
  free(fb.loWt);
  free(fb.Re);
  free(fb.Im);
}

/* 
 *  Convert wave -> (spectral subtraction) -> mel-frequency filterbank
 */
void MakeFBank(float *wave, double *fbank, FBankInfo fb, Value para, float *ssbuf)
{
  int k, bin, i;
  double Re, Im, A, P, NP, H, temp;

  for(k = 1; k <= para.framesize; k++){
    fb.Re[k - 1] = wave[k];  fb.Im[k - 1] = 0.0;  /* copy to workspace */
  }
  for(k = para.framesize + 1; k <= fb.fftN; k++){
    fb.Re[k - 1] = 0.0;      fb.Im[k - 1] = 0.0;  /* pad with zeroes */
  }
  
  /* Take FFT */
  FFT(fb.Re, fb.Im, fb.n);

  if (ssbuf != NULL) {
    /* Spectral Subtraction */
    for(k = 1; k <= fb.fftN; k++){
      Re = fb.Re[k - 1];  Im = fb.Im[k - 1];
      P = sqrt(Re * Re + Im * Im);
      NP = ssbuf[k - 1];
      if((P * P -  para.ss_alpha * NP * NP) < 0){
	H = para.ss_floor;
      }else{
	H = sqrt(P * P - para.ss_alpha * NP * NP) / P;
      }
      fb.Re[k - 1] = H * Re;
      fb.Im[k - 1] = H * Im;
    }
  }

  /* Fill filterbank channels */ 
  for(i = 1; i <= para.fbank_num; i++)
    fbank[i] = 0.0;
  
  for(k = fb.klo; k <= fb.khi; k++){
    Re = fb.Re[k-1]; Im = fb.Im[k-1];
    A = sqrt(Re * Re + Im * Im);
    bin = fb.loChan[k];
    Re = fb.loWt[k] * A;
    if(bin > 0) fbank[bin] += Re;
    if(bin < para.fbank_num) fbank[bin + 1] += A - Re;
  }

  /* Take logs */
  for(bin = 1; bin <= para.fbank_num; bin++){ 
    temp = fbank[bin];
    if(temp < 1.0) temp = 1.0;
    fbank[bin] = log(temp);  
  }
}


/* 
 *  Apply the DCT to filterbank
 */ 
void MakeMFCC(double *fbank, float *mfcc, Value para)
{
  int i, j;
  float A, B, C;
  
  A = sqrt(2.0 / para.fbank_num);
  B = PI / para.fbank_num;

  /* Take DCT */
  for(i = 1; i <= para.mfcc_dim; i++){
    mfcc[i - 1] = 0.0;
    C = i * B;
    for(j = 1; j <= para.fbank_num; j++)
      mfcc[i - 1] += fbank[j] * cos(C * (j - 0.5));
    mfcc[i - 1] *= A;     
  }       
}


/*
 * Calculate 0'th cepstral coeff.
 */
float CalcC0(double *fbank, Value para)
{
  int i; 
  float A,S;
  
  A = sqrt(2.0 / para.fbank_num);
  S = 0.0;
  for(i = 1; i <= para.fbank_num; i++)
    S += fbank[i];
  return S * A;
}


/* 
 *  Normalise log energy
 */
void NormaliseLogE(float **mfcc, float *energy, int frame_num, Value para)
{  
  float *p, max, min;
  int t;

  /* find max log energy */
  p = energy;
  max = p[0];
  for(t = 0; t < frame_num; t++)
    if(p[t] > max) max = p[t];

  /* set the silence floor */
  min = max - (para.silFloor * log(10.0)) / 10.0;  

  /* normalise */
  p = energy;
  for(t = 0; t < frame_num; t++){
    if (p[t] < min) p[t] = min;               
    mfcc[t][para.mfcc_dim] = 1.0 - (max - p[t]) * para.escale;
  }
}


/* 
 *  Calculate delta coefficients
 */
void Delta(float **c, int frame, Value para)
{
  int theta, t, n, dim, B = 0;
  float A1, A2, sum;

  for(theta = 1; theta <= para.delWin; theta++)
    B += theta * theta;

  dim = para.vec_num / 2;

  for(t = 0; t < frame; t++){
    for(n = 1; n <= dim; n++){
      sum = 0;
      for(theta = 1; theta <= para.delWin; theta++){
	/* Replicate the first or last vector */
	/* at the beginning and end of speech */
	if(t - theta < 0) A1 = c[0][n - 1];
	else A1 = c[t - theta][n - 1];
	if(t + theta >= frame) A2 = c[frame - 1][n - 1];
	else A2 = c[t + theta][n - 1];
	sum += theta * (A2 - A1);
      }
      c[t][n + para.mfcc_dim] = sum / (2 * B);
    }
  }
}


/* 
 *  Apply FFT
 */
void FFT(float *xRe, float *xIm, int p)
{
  int i, ip, j, k, m, me, me1, n, nv2;
  double uRe, uIm, vRe, vIm, wRe, wIm, tRe, tIm;
  
  n = 1<<p;
  nv2 = n / 2;
  
  j = 0;
  for(i = 0; i < n-1; i++){
    if(j > i){
      tRe = xRe[j];      tIm = xIm[j];
      xRe[j] = xRe[i];   xIm[j] = xIm[i];
      xRe[i] = tRe;      xIm[i] = tIm;
    }
    k = nv2;
    while(j >= k){
      j -= k;      k /= 2;
    }
    j += k;
  }

  for(m = 1; m <= p; m++){
    me = 1<<m;                me1 = me / 2;
    uRe = 1.0;                uIm = 0.0;
    wRe = cos(PI / me1);      wIm = -sin(PI / me1);
    for(j = 0; j < me1; j++){
      for(i = j; i < n; i += me){
	ip = i + me1;
	tRe = xRe[ip] * uRe - xIm[ip] * uIm;
	tIm = xRe[ip] * uIm + xIm[ip] * uRe;
	xRe[ip] = xRe[i] - tRe;   xIm[ip] = xIm[i] - tIm;
	xRe[i] += tRe;            xIm[i] += tIm;
      }
      vRe = uRe * wRe - uIm * wIm;   vIm = uRe * wIm + uIm * wRe;
      uRe = vRe;                     uIm = vIm;
    }
  }
}


/* 
 * Cepstrum Mean Normalization
 */

void CMN(float **mfcc, int frame_num, int dim)
{
  int i, t;
  float *mfcc_ave, *sum;

  mfcc_ave = (float *)mycalloc(dim, sizeof(float));
  sum = (float *)mycalloc(dim, sizeof(float));

  for(i = 0; i < dim; i++){
    sum[i] = 0.0;
    for(t = 0; t < frame_num; t++)
      sum[i] += mfcc[t][i];
    mfcc_ave[i] = sum[i] / frame_num;
  }
  for(t = 0; t < frame_num; t++){
    for(i = 0; i < dim; i++)
      mfcc[t][i] = mfcc[t][i] - mfcc_ave[i];
  }
  free(sum);
  free(mfcc_ave);
}
