/*============================================================================*/
/* ѹ                                                                   */
/*----------------------------------------------------------------------------*/
/* 1.0 : APIб 2003/07/19                                           */
/*============================================================================*/

#include <musashi.h>
#include <xtkmeanHelp.h>
#include <stdlib.h>
#include <limits.h>
#include <float.h>

struct mssComHelp comHelp={
  "xtkmean",      /* ޥ̾       */
  "1.0",          /* С       */
  HELPT,          /* ޥɥȥ */
  HELPS,          /*              */
  HELPE,          /*            */
  HELPR,          /* ȥޥ     */
  HELPA,          /* Ծ         */
  HELPB,          /* ХݡȾ */
  HELPH           /* ۡڡ     */
};


struct CalNum {
  MssValue accum;
  int   cnt;
};

/*ϥǡϢ*/
struct DataInfo {
  int    cnt;    /*ǡԿ*/
  MssValue *maxNum; /*͹ܤκ*/
  MssValue *minNum; /*͹ܤκǾ*/
  MssValue *rngNum; /*͹ܤκ-Ǿ*/
  MssValue *sumNum; /*͹ܤι*/
  int   *cntNum; /*͹ܤη(nullʤ)*/
  MssValue *avgNum; /*͹ܤʿ*/
};

struct Cluster {
  int cnt;               /*饹°쥳ɿ*/
  struct CalNum *calNum; /*ſη׻*/
  MssValue      *cenNum; /*ſ*/
  struct DataInfo *data;
};

struct Sample {
  struct SmpRec {
    MssValue        *num; /*̿*/
  } *rec;
  int recCnt;
  int numFldCnt;
  int catFldCnt;
  struct DataInfo *data;
};

extern struct mssGlobalVariables mssGV;

int totalFldCnt=0;
MssValue distance;
int kCnt;

/*----------------------------------------------------------------------------*/
/* 饹                                                                 */
/*----------------------------------------------------------------------------*/
  MssOptINT optCNT={
    OINT,   /* ץ󥿥                                             */
    "k",    /* (ʣʸԲ)                                   */
    1,      /* 0:ץ, 1:ɬ, 2:XMLtableǤΤɬ(txtǤ̵)      */
    0,      /* ǥե(ͤȤƻ)                                   */
    2,      /* Ǿ                                                       */
    50,     /*                                                        */
    CNTT,   /* ΥץΥȥ(Helpɽ)                         */
    CNTC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* ͹                                                                   */
/*----------------------------------------------------------------------------*/
  MssOptFLD optNUM={
    OFLD,   /* ץ󥿥                                             */
    "n",    /* (ʣʸԲ)                                   */
    1,      /* 0:ץ, 1:ɬ, 2:XMLtableǤΤɬ(txtǤ̵)      */
    MssFieldMaxCnt, /* ǽʺܿ                                 */
    "i",    /* оݤȤϥǡΥ(GUI)                  */
    1,      /* ɽĤ뤫ɤ(0:Բ,1:)                      */
    0,      /* ̾Ǥ뤫ɤ(0:Բ,1:)                    */
    NULL,   /* ܥץ(%ʲ)ǻǽʸ                        */
            /* ex) ԲĤξNULL, "nr": "-f ̾%rn"λǽ     */
    NUMT,   /* ΥץΥȥ(Helpɽ)                         */
    NUMC,   /* ΥץΥ(Helpɽ)                         */
    NUMF    /* ե饰ˤĤƤ(Helpɽ)ʣξϥޤǶڤ   */
  };

/*----------------------------------------------------------------------------*/
/* μ                                                                   */
/*----------------------------------------------------------------------------*/
  MssOptINT optSED={
    OINT,   /* ץ󥿥                                             */
    "S",    /* (ʣʸԲ)                                   */
    0,      /* 0:ץ, 1:ɬ, 2:XMLtableǤΤɬ(txtǤ̵)      */
    -1,     /* ǥե(ͤȤƻ)                                   */
    -1,     /* Ǿ                                                       */
    INT_MAX,/*                                                        */
    SEDT,   /* ΥץΥȥ(Helpɽ)                         */
    SEDC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* ̾                                                                   */
/*----------------------------------------------------------------------------*/
  MssOptSLS optFNM={
    OSLS,   /* ץ󥿥                                             */
    "a",    /* (ʣʸԲ)                                   */
    2,      /* 0:ץ, 1:ɬ, 2:XMLtableǤΤɬ(txtǤ̵)      */
    NULL,   /* ǥե(ʸ)                                           */
    1,      /* ޤǶڤǿκ                             */
    1,      /* ǤʸĹκǾ                                     */
    MssFieldNameMaxLen,/* ǤʸĹκ                          */
    0,      /* 1:Ǥ˥Ǥ,0:Բ  ex) aaaa:xxxxx            */
    FNMT,   /* ΥץΥȥ(Helpɽ)                         */
    FNMC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* ɤη׻ˡ                                                       */
/*----------------------------------------------------------------------------*/
  MssOptINT optINI={
    OINT,   /* ץ󥿥                                             */
    "d",    /* (ʣʸԲ)                                   */
    0,      /* 0:ץ, 1:ɬ, 2:XMLtableǤΤɬ(txtǤ̵)      */
    2,      /* ǥե(ͤȤƻ)                                   */
    0,      /* Ǿ                                                       */
    2,      /*                                                        */
    INIT,   /* ΥץΥȥ(Helpɽ)                         */
    INIC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* -s2λΥץĿ                                                      */
/*----------------------------------------------------------------------------*/
  MssOptINT optMUL={
    OINT,   /* ץ󥿥                                             */
    "m",    /* (ʣʸԲ)                                   */
    0,      /* 0:ץ, 1:ɬ, 2:XMLtableǤΤɬ(txtǤ̵)      */
    10,     /* ǥե(ͤȤƻ)                                   */
    0,      /* Ǿ                                                       */
    100,    /*                                                        */
    MULT,   /* ΥץΥȥ(Helpɽ)                         */
    MULC    /* ΥץΥ(Helpɽ)                         */
  };


/*----------------------------------------------------------------------------*/
/* ϥե                                                               */
/*----------------------------------------------------------------------------*/
  MssOptINF optINF={
    OINF,   /* ץ󥿥                                             */
    "i",    /* (ʣʸԲ)                                   */
    0,      /* 0:ץ, 1:ɬ                                         */
    1,      /* ǽκե                                     */
    0,      /*1:file not foundΥ顼ǽλʤ 0:                   */
    INFT,   /* ΥץΥȥ(Helpɽ)                         */
    INFC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* ϥե                                                               */
/*----------------------------------------------------------------------------*/
  MssOptOTF optOTF={
    OOTF,   /* ץ󥿥                                             */
    "o",    /* (ʣʸԲ)                                   */
    0,      /* 0:ץ, 1:ɬ                                         */
    OTFT,   /* ΥץΥȥ(Helpɽ)                         */
    OTFC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* ̽                                                                   */
/*----------------------------------------------------------------------------*/
  MssOptFLG optZIP={
    OFLG,   /* ץ󥿥                                             */
    "z",    /* (ʣʸԲ)                                   */
    0,      /* ǥե(Ūˤ0) onˤȤ1ˤ          */
    ZIPT,   /* ΥץΥȥ(Helpɽ)                         */
    ZIPC    /* ΥץΥ(Helpɽ)                         */
  };

/*----------------------------------------------------------------------------*/
/* plain text                                                                 */
/*----------------------------------------------------------------------------*/
  MssOptFLG optTXT={
    OFLG,   /* ץ󥿥                                             */
    "t",    /* (ʣʸԲ)                                   */
    0,      /* ǥե(Ūˤ0) onˤȤ1ˤ          */
    TXTT,   /* ΥץΥȥ(Helpɽ)                         */
    TXTC    /* ΥץΥ(Helpɽ)                         */
  };

void *opt[]={&optCNT,&optNUM,&optSED,&optINI,&optMUL,&optFNM,
             &optINF,&optOTF,&optZIP,&optTXT,NULL};

struct mssFields *fnum; /*ܹ¤*/


/*============================================================================*/
/* ؿ                                                                       */
/*============================================================================*/
static void valPrint(MssValue a){

  if(a.nul){
    printf("NULL");
    return;
  }

  switch(a.vType){
    case DBL: printf("%g",a.v.d); break;
    case INT: printf("%d",a.v.i); break;
    default:
    break;
  }
 
}

/*----------------------------------------------------------------------------*/
/*Cluster¤Τɽ                                                         */
/*----------------------------------------------------------------------------*/
void showCluster(struct Cluster *cluster){
  int i,j;

  printf("----------------- showCluster\n");
  for(i=0; i<kCnt; i++){
    printf("c[%d] cnt=%d : ",i,(cluster+i)->cnt);
    for(j=0; j<fnum->cnt; j++){
      valPrint(*((cluster+i)->cenNum+j));
      printf("(");
      valPrint(((cluster+i)->calNum+j)->accum);
      printf(",%d)",((cluster+i)->calNum+j)->cnt);
      printf(")");
    }
    printf("\n");
  }
}
 
/*----------------------------------------------------------------------------*/
/*Sample¤Τɽ                                                          */
/*----------------------------------------------------------------------------*/
void showSample(struct Sample *sample){
  int i,j;

  printf("---------------- showSample\n");
  for(i=0; i<sample->recCnt; i++){
    printf("%4d : ",i);
    for(j=0; j<sample->numFldCnt; j++){
      if( (*((sample->rec+i)->num+j)).nul ){
        printf( "* " );
      }else{
        printf( "%g ",(*((sample->rec+i)->num+j)).v.d );
      }
    }
    printf("\n");
  }
  printf("----------------\n");
}



/*----------------------------------------------------------------------------*/
/* ʹ֤εΥ׻                                                         */
/*----------------------------------------------------------------------------*/
MssValue disNum(MssValue x, MssValue y){
  MssValue rsl;
  double sub;

  mssVinit(&rsl,DBL);

  if(x.nul || y.nul ){
    rsl.nul=1;
  }else{
    rsl.nul=0;
    /*桼åɵΥ*/
    sub=x.v.d - y.v.d;
    if(sub<0) rsl.v.d=sub*(-1);
    else      rsl.v.d=sub;

    /*桼åɵΥ*/
    /*rsl.v.d = (x.v.d - y.v.d)*(x.v.d - y.v.d);*/
  }
  return(rsl);
}

/*----------------------------------------------------------------------------*/
/* ƹܤεΥʿѤ׻(NULLϷ׻ʤ)                             */
/*----------------------------------------------------------------------------*/
MssValue disAvg(MssValue *disFld, int cnt){
  double sum=0;
  int j=0;
  MssValue rsl;
  int i;

  mssVinit(&rsl,DBL);

  /**/
  sum=0;
  for(i=0;i<cnt;i++){
    if(! (*(disFld+i)).nul){
      sum+=(*(disFld+i)).v.d;
      j++;
    }
  }

  if(j==0){
    rsl.nul=1;
  }else{
    rsl.nul=0;
    rsl.v.d=sum/(double)j;
  }
  return(rsl);
}

static MssValue norm(MssValue num, MssValue min, MssValue rng){

  return( mssVdiv(mssVsub(num,min),rng) );

}

/*----------------------------------------------------------------------------*/
/* mܤΥץ  nܤΥץȤεΥ                                */
/*----------------------------------------------------------------------------*/
MssValue calDistanceSmpSmp( struct Sample *sample, int m, int n ){

  MssValue *disFld;
  MssValue  sum;
  int i,j;
  MssValue a,b;
  MssValue *minNum;
  MssValue *rngNum;

  mssVinit(&sum,DBL);
  mssVinit(&a  ,DBL);
  mssVinit(&b  ,DBL);

  minNum=sample->data->minNum;
  rngNum=sample->data->rngNum;

  disFld=mssCalloc(sizeof(MssValue)*fnum->cnt,"near");
  for(i=0; i<fnum->cnt; i++) mssVinit(disFld+i,DBL);

  j=0;
  /*͹*/
  for(i=0; i<fnum->cnt; i++){
    a=norm(*((sample->rec+m)->num+i),*(minNum+i),*(rngNum+i));
    b=norm(*((sample->rec+n)->num+i),*(minNum+i),*(rngNum+i));
    *(disFld+j++)=disNum(a,b);
  }

  /*ƥ*/

  /*Υη׻*/
  sum=disAvg(disFld,j);
  mssFree(disFld);

  return( sum );
}


/*----------------------------------------------------------------------------*/
/* kܤcluster  n ̾ΥץȤεΥ                                 */
/*----------------------------------------------------------------------------*/
MssValue calDistanceClsSmp(
  struct Cluster *cluster, int k,
  struct Sample  *sample,  int n){

  MssValue *disFld;
  MssValue  sum;
  int i,j;
  MssValue a,b;
  MssValue *minNum;
  MssValue *rngNum;

  mssVinit(&sum,DBL);
  mssVinit(&a  ,DBL);
  mssVinit(&b  ,DBL);

  minNum=sample->data->minNum;
  rngNum=sample->data->rngNum;

  disFld=mssCalloc(sizeof(MssValue)*fnum->cnt,"near");
  for(i=0; i<fnum->cnt; i++) mssVinit(disFld+i,DBL);

  j=0;
  /*͹*/
  for(i=0; i<fnum->cnt; i++){
    a=norm(*((cluster+k)->cenNum+i) ,*(minNum+i),*(rngNum+i));
    b=norm(*((sample->rec+n)->num+i),*(minNum+i),*(rngNum+i));
    *(disFld+j++)=disNum(a,b);
  }

  /*ƥ*/

  /*Υη׻*/
  sum=disAvg(disFld,j);
  mssFree(disFld);

  return( sum );
}

/*----------------------------------------------------------------------------*/
/*Ϳ줿ԤǤᤤ饹ֹ֤                                    */
/*  饹NULLξ,⤷ϥǡܤNULLξ,⤷      */
/*  饹ιܤȥǡιܤޤNULLǤߤä-1֤Ȥ  */
/*  ʤ롣ʤᤤ饹ʬʤȤȤˤʤ                  */
/*----------------------------------------------------------------------------*/
int nearestCluster(struct Cluster *cluster, struct mssFldRec *fr){
  MssValue  distanceTmp;
  MssValue *disFld;
  int  c,j,i,k;
  char *s;
  MssValue a,b;
  MssValue *minNum;
  MssValue *rngNum;
  MssValue *avgNum;

  k=-1;

  mssVinit(&distanceTmp,DBL);
  mssVinit(&distance,DBL);
  distance.v.d=9999;
  mssVinit(&a  ,DBL);
  mssVinit(&b  ,DBL);

  minNum=cluster->data->minNum;
  rngNum=cluster->data->rngNum;
  avgNum=cluster->data->avgNum;

  disFld=mssCalloc(sizeof(MssValue)*fnum->cnt,"near");
  for(i=0; i<fnum->cnt; i++) mssVinit(disFld+i,DBL);

  for(c=0; c<kCnt; c++){
    j=0;

    /*͹*/
    for(i=0; i<fnum->cnt; i++){
      a=norm(*((cluster+c)->cenNum+i),*(minNum+i),*(rngNum+i));
      s=*(fr->pnt+MssFlds2num(fnum,i));
      if(*s=='*') { b=*(avgNum+i); }
      else        {b.nul=0;b.v.d=atof(s);}
      b=norm(b,*(minNum+i),*(rngNum+i));
      *(disFld+j++)=disNum(a,b);
    }

    /*ƥ*/

    /*Υη׻*/
    distanceTmp=disAvg(disFld,j);

    /*Υй*/
    if( mssVcmp(distanceTmp, OPE_LT, distance) ){
      distance=distanceTmp;
      k=c;
    }
  }

  mssFree(disFld);
  return(k);
}

/*----------------------------------------------------------------------------*/
/*Ϳ줿ԤǤᤤ饹ֹ֤(sample)                          */
/*  饹NULLξ,⤷ϥǡܤNULLξ,⤷      */
/*  饹ιܤȥǡιܤޤNULLǤߤä-1֤Ȥ  */
/*  ʤ롣ʤᤤ饹ʬʤȤȤˤʤ                  */
/*----------------------------------------------------------------------------*/
int nearestClusterSmp(struct Cluster *cluster, struct SmpRec *rec){
  MssValue  distanceTmp;
  MssValue *disFld;
  int  c,j,i,k;
  MssValue a,b;
  MssValue *minNum;
  MssValue *rngNum;

  mssVinit(&distanceTmp,DBL);
  mssVinit(&distance,DBL);
  mssVinit(&a  ,DBL);
  mssVinit(&b  ,DBL);

  k=-1;
  distance.v.d=9999;

  minNum=cluster->data->minNum;
  rngNum=cluster->data->rngNum;

  disFld=mssCalloc(sizeof(MssValue)*fnum->cnt,"near");
  for(i=0; i<fnum->cnt; i++) mssVinit(disFld+i,DBL);

  for(c=0; c<kCnt; c++){
    j=0;

    /*͹*/
    for(i=0; i<fnum->cnt; i++){
      a=norm(*((cluster+c)->cenNum+i) ,*(minNum+i),*(rngNum+i));
      b=norm(*(rec->num+i)            ,*(minNum+i),*(rngNum+i));
      *(disFld+j++)= disNum(a,b);
    }
    /*ƥ*/

    /*Υη׻*/
    distanceTmp=disAvg(disFld,j);

    /*Υй*/
    if( mssVcmp(distanceTmp, OPE_LT, distance) ){
      distance=distanceTmp;
      k=c;
    }
  }
  mssFree(disFld);
  return(k);
}

/*----------------------------------------------------------------------------*/
/*ǡƥ饹˿ʬſ׻Τͤ򹹿Ƥ            */
/*----------------------------------------------------------------------------*/
void setCluster(struct Cluster *cluster, struct mssHeader *hdi, struct mssFPR *fpr){
  struct mssFldRec *fr;  /*-ԥХåե¤*/
  int i,k;
  char *s;

  for(k=0; k<kCnt; k++){
    (cluster+k)->cnt=0; /*饹°쥳ɿ*/
    for(i=0; i<fnum->cnt; i++){
      mssVinit( &((cluster+k)->calNum+i)->accum, DBL );
      ((cluster+k)->calNum+i)->cnt=0;
    }
  }

  fr=mssInitFldRec(hdi->flds->cnt);
  mssSeekTopFPR(fpr);
  while( EOF != mssReadFldRec(fpr,fr) ){

    k=nearestCluster(cluster,fr);
    if(k!=-1){
      (cluster+k)->cnt++; /*饹°쥳ɿ*/

      /*͹*/
      for(i=0; i<fnum->cnt; i++){
        s=*(fr->pnt+MssFlds2num(fnum,i));
        if(*s=='*'){
          ((cluster+k)->calNum+i)->accum.v.d += (*(cluster->data->avgNum+i)).v.d;
        }else{
          ((cluster+k)->calNum+i)->accum.v.d += atof(s);
        }
        ((cluster+k)->calNum+i)->cnt++;
      }

    /*ƥ*/

    }
  }
  mssFreeFldRec(fr);
}

/*----------------------------------------------------------------------------*/
/*ץ󥰥ǡƥ饹˿ʬſ׻Τͤ򹹿Ƥ  */
/*----------------------------------------------------------------------------*/
void setClusterSmp(struct Cluster *cluster, struct Sample *sample){
  int i,k;
  int smp;
  MssValue v;

  for(k=0; k<kCnt; k++){
    (cluster+k)->cnt=0; /*饹°쥳ɿ*/
    for(i=0; i<fnum->cnt; i++){
      mssVinit( &((cluster+k)->calNum+i)->accum, DBL );
      ((cluster+k)->calNum+i)->cnt=0;
    }
  }

  for(smp=0; smp<sample->recCnt; smp++){
    k=nearestClusterSmp(cluster,sample->rec+smp);
    (cluster+k)->cnt++; /*饹°쥳ɿ*/

    /*͹*/
    for(i=0; i<fnum->cnt; i++){
      v=*((sample->rec+smp)->num+i);
      if(!v.nul){
        ((cluster+k)->calNum+i)->accum.v.d += v.v.d;
        ((cluster+k)->calNum+i)->cnt++;
      }
    }
    /*ƥ*/

  }
}

/*----------------------------------------------------------------------------*/
/* ƥ饹νſ׻ſѤ1Ѥʤ0֤             */
/*----------------------------------------------------------------------------*/
int movCenter(struct Cluster *cluster){
  int mov=0; /*ſưե饰*/
  MssValue newCen;
  int i,k;

  mssVinit(&newCen,DBL);

  for(k=0; k<kCnt; k++){
    /*͹*/
    for(i=0; i<fnum->cnt; i++){
      if(((cluster+k)->calNum+i)->cnt != 0){
        newCen.v.d =((cluster+k)->calNum+i)->accum.v.d /
            (double)((cluster+k)->calNum+i)->cnt;
        /*cnt==0λcenterưʤ */
        if( mssVcmp(newCen,OPE_NE,*((cluster+k)->cenNum+i)) ){
          *((cluster+k)->cenNum+i)=newCen;
          mov=1;
        }
      }
    }
  }
  return(mov);
}

/*----------------------------------------------------------------------------*/
/* ϥǡγƼ׻                                                 */
/*----------------------------------------------------------------------------*/
struct DataInfo *getDatInfo(
  struct mssHeader  *hdi,  /*إå¤*/
  struct mssFPR     *fpr){ /*եݥ*/

  struct DataInfo *data;    /*ϥǡ¤*/
  struct mssFldRec *fr;    /*-ԥХåե¤*/
  char  *str;
  MssValue  num;
  int i;

  mssVinit(&num,DBL);

  data        =mssMalloc(sizeof(struct DataInfo),"gdi");
  data->maxNum=mssMalloc(sizeof(MssValue)*fnum->cnt,"gdi");
  data->minNum=mssMalloc(sizeof(MssValue)*fnum->cnt,"gdi");
  data->rngNum=mssMalloc(sizeof(MssValue)*fnum->cnt,"gdi");
  data->sumNum=mssMalloc(sizeof(MssValue)*fnum->cnt,"gdi");
  data->cntNum=mssMalloc(sizeof(int)*fnum->cnt,"gdi");
  data->avgNum=mssMalloc(sizeof(MssValue)*fnum->cnt,"gdi");
  for(i=0; i<fnum->cnt; i++){
    mssVinit(data->maxNum+i,DBL);
    mssVinit(data->minNum+i,DBL);
    mssVinit(data->rngNum+i,DBL);
    mssVinit(data->sumNum+i,DBL);
    mssVinit(data->avgNum+i,DBL);
    (*(data->maxNum+i)).v.d=-DBL_MAX;
    (*(data->minNum+i)).v.d=DBL_MAX;
    (*(data->rngNum+i)).v.d=0;
    (*(data->sumNum+i)).v.d=0;
     *(data->cntNum+i)     =0;
    (*(data->avgNum+i)).v.d=0;
  }
  data->cnt=0;

  fr=mssInitFldRec(hdi->flds->cnt);
  mssSeekTopFPR(fpr);

  while( EOF != mssReadFldRec(fpr,fr) ){

    /*͹*/
    for(i=0; i<fnum->cnt; i++){
      str=*(fr->pnt+MssFlds2num(fnum,i));
      if(*str!='*'){
        num.v.d=atof(str);
        (*(data->cntNum+i))++;                      /**/
        (*(data->sumNum+i)).v.d+=num.v.d;           /**/
        if( mssVcmp(num,OPE_GT,*(data->maxNum+i)) )
          (*(data->maxNum+i)).v.d=num.v.d;          /**/
        if( mssVcmp(num,OPE_LT,*(data->minNum+i)) )
          (*(data->minNum+i)).v.d=num.v.d;          /*Ǿ*/
      }
    }

    data->cnt++;
  }
  mssFreeFldRec(fr);

  /*ǡʤ顼å*/
  for(i=0; i<fnum->cnt; i++){
    if( *(data->cntNum+i) ==0 ) {
      mssShowErrMsg("value not found on some fields");
      exit(mssErrorNoDefault);
    }
  }

  /* -Ǿ,ʿͤ */
  for(i=0; i<fnum->cnt; i++){
    (*(data->rngNum+i)).v.d=(*(data->maxNum+i)).v.d-(*(data->minNum+i)).v.d;
    (*(data->avgNum+i)).v.d=(*(data->sumNum+i)).v.d/(double)(*(data->cntNum+i));
  }

  /* ǡԿΥ饹¿ */
  if(data->cnt<kCnt) kCnt=data->cnt;

/*
printf("line cnt=%d\n",data->cnt);
for(i=0; i<fnum->cnt; i++){
  printf("fld[%d] : ",i);
  printf("min=%g, ",(*(data->minNum+i)).v.d);
  printf("max=%g, ",(*(data->maxNum+i)).v.d);
  printf("rng=%g, ",(*(data->rngNum+i)).v.d);
  printf("sum=%g, ",(*(data->sumNum+i)).v.d);
  printf("cnt=%d, ",*(data->cntNum+i));
  printf("avg=%g\n",(*(data->avgNum+i)).v.d);
}
*/

  return(data);
}

void freeDataInfo(struct DataInfo *data){

  mssFree(data->maxNum);
  mssFree(data->minNum);
  mssFree(data->rngNum);
  mssFree(data->sumNum);
  mssFree(data->cntNum);
  mssFree(data->avgNum);
  mssFree(data);
}

/*----------------------------------------------------------------------------*/
/*Cluster¤Τΰ                                                     */
/*----------------------------------------------------------------------------*/
struct Cluster *malCluster(int cnt){
  struct Cluster *cluster;
  int i,j;

  cluster=mssCalloc(sizeof(struct Cluster)*cnt,"malCluster");
  for(i=0; i<cnt; i++){ /*饹*/
    (cluster+i)->cenNum   = mssCalloc(sizeof(MssValue        )*fnum->cnt,"mal");
    (cluster+i)->calNum   = mssCalloc(sizeof(struct CalNum)*fnum->cnt,"mal");
    for(j=0; j<fnum->cnt; j++) mssVinit((cluster+i)->cenNum+j,DBL);
  }

  return(cluster);
}

void freeCluster(struct Cluster *cluster, int cnt){
  int i;

  for(i=0; i<cnt; i++){ /*饹*/
    mssFree((cluster+i)->cenNum);
    mssFree((cluster+i)->calNum);
  }
  mssFree(cluster);
}

void freeSample(struct Sample *sample){
  int i;

  for(i=0; i<sample->recCnt; i++){
    mssFree( (sample->rec+i)->num );
  }
  mssFree( sample->rec );
  mssFree( sample );
}

/*----------------------------------------------------------------------------*/
/* samplerecCnt掠ץ󥰤                                         */
/*----------------------------------------------------------------------------*/
struct Sample *samplingSmp(
  struct Sample *orgSmp,  /*ϥǡ*/
  int            recCnt){ /*ץ󥰹Կ*/

  struct Sample *sample;  /*ץ󥰤줿ǡǼ*/
  int select;
  int remaining;
  int r;                   /*ǡwhileԿ*/
  int j;


  /*ץ󥰷Ĵ*/
  if(recCnt > orgSmp->recCnt) recCnt=orgSmp->recCnt;

  /*ΰ*/
  sample     =mssMalloc(sizeof(struct Sample),       "sampling");
  sample->rec=mssMalloc(sizeof(struct SmpRec)*recCnt,"sampling");
  for(j=0; j<recCnt; j++){
    (sample->rec+j)->num=mssMalloc(sizeof(MssValue)*fnum->cnt,"sampling");
  }

  select=recCnt;
  remaining=orgSmp->recCnt;
  for(r=0; r<orgSmp->recCnt; r++){
    if( (rand()%remaining) < select ){
      /*͹*/
      for(j=0; j<fnum->cnt; j++){
        *((sample->rec+recCnt-select)->num+j) =*((orgSmp->rec+r)->num+j);
      }

      /*ƥ*/

      select--;
    }
    remaining--;
  }

  sample->recCnt=recCnt;
  sample->numFldCnt=fnum->cnt;

  return(sample);
}

/*----------------------------------------------------------------------------*/
/* ե뤫recCnt掠ץ󥰤sample[]˥åȤ                   */
/*----------------------------------------------------------------------------*/
void sampling(
  struct Sample   *sample[], /*ץ󥰤줿ǡǼ*/
  int              mul,      /*åѰդ뤫*/
  int              recCnt,   /*ץ󥰹Կ*/
  struct DataInfo *data,     /*ϥǡ¤*/
  struct mssHeader   *hdi,      /*إå¤*/
  struct mssFPR      *fpr){     /*եݥ*/

  struct mssFldRec *fr;       /*-ԥХåե¤*/
  int select[10];
  int remaining[10];
  int r;                   /*ǡwhileԿ*/
  int i,j;
  char *s;
  MssValue val;

  mssVinit(&val,DBL);

  /*ץ󥰷Ĵ*/
  if(recCnt < 100      ) recCnt=100;
  if(recCnt > 5000     ) recCnt=5000;
  if(recCnt > data->cnt) recCnt=data->cnt;

  /*ΰ*/
  for(i=0; i<mul; i++){
    sample[i]     =mssMalloc(sizeof(struct Sample),       "sampling");
    sample[i]->rec=mssMalloc(sizeof(struct SmpRec)*recCnt,"sampling");
    for(j=0; j<recCnt; j++){
      (sample[i]->rec+j)->num=mssMalloc(sizeof(MssValue)*fnum->cnt,"sampling");
    }
    sample[i]->data=data;
  }

  fr=mssInitFldRec(hdi->flds->cnt);
  mssSeekTopFPR(fpr);
  for(i=0; i<mul; i++){
    select[i]=recCnt;
    remaining[i]=data->cnt;
  }
  r=0;
  while( EOF != mssReadFldRec(fpr,fr) ){

    for(i=0; i<mul; i++){
      if( (rand()%remaining[i]) < select[i] ){
        /*͹*/
        for(j=0; j<fnum->cnt; j++){
          s=*(fr->pnt+MssFlds2num(fnum,j));
          if(*s=='*'){ val=*(data->avgNum+j); }
          else       { val.nul=0; val.v.d=atof(s); }
          *((sample[i]->rec+recCnt-select[i])->num+j) = val;
        }

        /*ƥ*/

        select[i]--;
      }
      remaining[i]--;
      r++;
    }
  }
  mssFreeFldRec(fr);

  for(i=0; i<mul; i++){
    sample[i]->recCnt=recCnt;
    sample[i]->numFldCnt=fnum->cnt;
  }
}

void setSmp2Cluster(
  struct Cluster *cluster,
  int k,
  struct Sample  *sample,
  int s){

  int i;
  /*͹*/
  for(i=0; i<fnum->cnt; i++){
    *((cluster+k)->cenNum+i)=*((sample->rec+s)->num+i);
  }
}

/*============================================================================*/
/* Seedκ                                                             */
/*============================================================================*/
/*----------------------------------------------------------------------------*/
/*                                                              */
/*----------------------------------------------------------------------------*/
struct Cluster *initClusterRA( struct Sample *sample ){

  struct Sample  *smp;
  struct Cluster *cluster; /*饹¤*/
  int k=0;                 /*饹ֹ*/

  /* kĤ*/
  smp=samplingSmp(sample,kCnt);

  /*ΰ*/
  cluster=malCluster(kCnt);
  cluster->data=sample->data;

  for(k=0; k<smp->recCnt; k++){
    setSmp2Cluster(cluster,k,smp,k);
  }
  freeSample(smp);
  return(cluster);
}

/*----------------------------------------------------------------------------*/
/* KAUFMAN APPROACH                                                           */
/* Reference :                                                                */
/* Lozano, "An empirical comparison of four initialization methods            */
/* for the K-Mean",p6.                                                        */
/*----------------------------------------------------------------------------*/
/*ſ˺Ǥᤤ쥳ֹ*/
static int getCenInstance(struct Sample *sample){

  struct Cluster *cluster;
  MssValue disTmp;
  int i,j,k;
  MssValue val;

  cluster = malCluster(1);

  for(i=0; i<sample->recCnt; i++){
    for(j=0; j<sample->numFldCnt; j++){
      if(! (*((sample->rec+i)->num+j)).nul ){
        (cluster->calNum+j)->accum.v.d += (*((sample->rec+i)->num+j)).v.d;
        (cluster->calNum+j)->cnt++;
      }
    }
  }
  for(j=0; j<sample->numFldCnt; j++){
    mssVinit(&val,DBL);
    if( (cluster->calNum+j)->cnt ==0 ){
      val.nul=1;
    }else{
      val.v.d= (cluster->calNum+j)->accum.v.d/(double)(cluster->calNum+j)->cnt;
    }
    *(cluster->cenNum+j)=val;
  }

  k=0;
  mssVinit(&distance,DBL);
  distance.v.d=DBL_MAX;
  for(i=0; i<sample->recCnt; i++){
    disTmp=calDistanceClsSmp(cluster,0,sample,i);
    if( mssVcmp(distance,OPE_GT,disTmp) ){
      distance=disTmp;
      k=i;
    }
  }
  return(k);
}


/* ĤΥץ֤εΥ֤ */
static MssValue get_dji(struct Sample *sample, int i, int j){

  return(calDistanceSmpSmp(sample, i, j));

}

/*ϿƤƥ饹ȥץjȤκûΥ֤ */
static MssValue get_Dj(
  struct Cluster *cluster,
  int k,                  /*ϿƤ륯饹*/
  struct Sample *sample, int j){
  MssValue dis;
  MssValue disMin;
  int s_min=0;
  int s;

  mssVinit(&disMin,DBL);
  disMin.v.d=DBL_MAX;
  for(s=0; s<k; s++){
    dis=calDistanceClsSmp(cluster,s,sample,j);
    if( mssVcmp(disMin, OPE_GT, dis) ){
      disMin=dis;
      s_min=s;
    }
  }
  return(disMin);
}

/*sListkܤޤǤˡsУ֤*/
static int isInCluster(int s,int *sList, int k){
  int i;
  for(i=0; i<k; i++){
    if( s==*(sList+i) ) return(1);
  }
  return(0);
}

/* ᥤ롼 (KA) */
static struct Cluster *initClusterKA( struct Sample *sample ){

  struct Cluster *cluster; /*饹¤*/
  int k=0;                 /*饹ֹ*/
  MssValue    Cji,Cji_max;
  MssValue    val0;
  int    i,j,i_max=0;
  int     s;
  int    *sList; /*饹򤵤줿ץֹꥹ*/

  mssVinit(&Cji,DBL);
  mssVinit(&Cji_max,DBL);
  mssVinit(&val0,DBL);

  /*ΰ*/
  cluster=malCluster(kCnt);
  cluster->data=sample->data;

  /*ֹǼΰγ*/
  sList=mssCalloc(sizeof(int)*kCnt,"initCluster");

  k=0;
  s=getCenInstance(sample);
  *(sList+k)=s;
  setSmp2Cluster(cluster,k++,sample,s);
  while(1){
    Cji_max.v.d=-1;
    for(i=0; i<sample->recCnt; i++){
      if(isInCluster(i,sList,k)) continue;
      mssVinit(&Cji,DBL);
      for(j=0; j<sample->recCnt; j++){
        if(isInCluster(j,sList,k)) continue;
        if(i==j) continue;
        Cji = mssVadd(Cji,mssVmax(mssVsub(get_Dj(cluster,k,sample,j),get_dji(sample,i,j)),val0));
      }
      if( mssVcmp(Cji, OPE_GT, Cji_max) ){
        Cji_max=Cji;
        i_max=i;
      }
    }

    *(sList+k)=i_max;
    setSmp2Cluster(cluster,k++,sample,i_max);
    if(k==kCnt) break;
  }

  mssFree(sList);
  return(cluster);
}

/*----------------------------------------------------------------------------*/
/* BRADLEY & FAYYAD APPROACH                                                  */
/* Reference :                                                                */
/* Bradley, Fayyad, "Refining Initial Points for K-Means Clustering"          */
/* CMi : CM[i] */
/* CM  : smpCM */
/* FMi : FM[i] */
/*----------------------------------------------------------------------------*/
/*kܤΥ饹ǤΥ줿ץ򥯥饹濴ȤϿ*/
static struct Cluster *farthest(
  struct Cluster *cluster,
  int k,
  struct Sample *sample){

  struct Cluster *cls; /*饹*/
  MssValue dis;
  MssValue dis_max;
  int i_max;
  int i;

  mssVinit(&dis,DBL);
  mssVinit(&dis_max,DBL);

  /* start i_max λ */
  dis_max.v.d=-1;
  i_max=-1;
  for(i=0; i<sample->recCnt; i++){
    dis=calDistanceClsSmp(cluster, k, sample,i);
    if( mssVcmp(dis, OPE_GT, dis_max) ){
      dis_max=dis;
      i_max=i;
    }
  }
  cls=malCluster(kCnt);
  cls->cnt=cluster->cnt;
  for(i=0; i<kCnt;i++){
    if(i!=k){
      (cls+i)->cnt   =(cluster+i)->cnt;
      (cls+i)->cenNum=(cluster+i)->cenNum;
      (cls+i)->calNum=(cluster+i)->calNum;
    }else{
      mssFree((cluster+k)->cenNum);
      mssFree((cluster+k)->calNum);
    }
  }
  cls->data=cluster->data;
  mssFree(cluster);
  setSmp2Cluster(cls,k,sample,i_max);
  return(cls);
}

static struct Cluster *initClusterBF( struct Sample *sample[] ){

  struct Sample  *smpCM;  /*ץ*/
  struct Cluster *CM[10]; /*饹¤*/
  struct Cluster *FM[10]; /*饹¤*/
  int i,j,s,flg;
  int minFMi=0;
  MssValue minFM;
  MssValue tmpFM;


  /* ץκ */
  for(s=0; s<optMUL.val; s++){
    /*ΰ(CM[s])*/
    CM[s]=malCluster(kCnt);

    /*ϥǡͤȤkĤνſ*/
    /*CM[s]=initClusterKA(sample[s]);*/
    CM[s]=initClusterRA(sample[s]);
    CM[s]->data=sample[s]->data;

    /*showCluster(CM[s]);*/

    /* CONVERGENCE (kmean-mod)*/
    for(i=0;i<2;i++){
      /* k-mean */
      while(1){
        setClusterSmp(CM[s],sample[s]);
        if(!movCenter(CM[s]))break;
      }

      /* k-mean mod*/
      flg=0;
      for(j=0; j<kCnt; j++){
        /*Υ饹⤷NULLΥ饹ä*/
        if((CM[s]+j)->cnt == 0){
          CM[s]=farthest(CM[s],j,sample[s]);
          flg=1; /*Υ饹äե饰*/
          break;
        }
      }
      if(!flg) break;
      /*2ܤλԤǶΥ饹NULL֤*/
      if(i==1 && flg){
        return(NULL);
      }
    }
  }

  /*饹νſ򥵥ץȤϿ*/
  /*͹*/
  /*ΰ(smpCM)*/
  smpCM     =mssMalloc(sizeof(struct Sample),                "sampling");
  smpCM->rec=mssMalloc(sizeof(struct SmpRec)*optMUL.val*kCnt,"sampling");
  for(j=0; j<optMUL.val*kCnt; j++){
    (smpCM->rec+j)->num=mssMalloc(sizeof(MssValue)*fnum->cnt,"sampling");
  }
  smpCM->recCnt=0;
  smpCM->numFldCnt=fnum->cnt;
  for(s=0; s<optMUL.val; s++){
    for(i=0; i<kCnt; i++){
      for(j=0; j<fnum->cnt; j++){
        *((smpCM->rec+smpCM->recCnt)->num+j) = *((CM[s]+i)->cenNum+j);
      }
      smpCM->recCnt++;
    }
  }

  /*showSample(smpCM);*/

  /* FM[i]λ*/
  for(s=0; s<optMUL.val; s++){
    /*ΰ(CM[s])*/
    FM[s]=malCluster(kCnt);
    FM[s]->data=sample[s]->data;

    /*CM[i]FM[i]νͤȤ*/
    /*͹*/
    for(i=0; i<kCnt; i++){
      for(j=0; j<fnum->cnt; j++){
        *((FM[s]+i)->cenNum+j) = *((CM[s]+i)->cenNum+j);
      }
    }
 
    /* CONVERGENCE */
    while(1){
      setClusterSmp(FM[s],smpCM);
      if(!movCenter(FM[s]))break;
    }
  }

  /* DISTORTION(FM[i],smpCM)          */
  /* äȤsmpCMfitFM[i]*/
  mssVinit(&minFM,DBL);
  minFM.v.d=DBL_MAX;
  for(s=0; s<optMUL.val; s++){
    setClusterSmp(FM[s], smpCM);
    mssVinit(&tmpFM,DBL);
    for(i=0; i<kCnt; i++){
      /**/
      for(j=0; j<fnum->cnt; j++){
        tmpFM = mssVadd( tmpFM, ((FM[s]+i)->calNum+j)->accum);
      }
    }
    if( mssVcmp(minFM, OPE_GT, tmpFM) ){
      minFM =tmpFM;
      minFMi=s;
    }
  }

  /*showCluster(FM[minFMi]);*/

  /*ΰ賫(CM[i])*/
  for(s=0; s<optMUL.val; s++){
    freeCluster(CM[s],kCnt);
    if(s!=minFMi) freeCluster(FM[s],kCnt);
  }
  for(j=0; j<optMUL.val*kCnt; j++){
    mssFree( (smpCM->rec+j)->num);
  }
  mssFree( smpCM->rec );
  mssFree( smpCM );
  FM[minFMi]->data=sample[0]->data;
  return(FM[minFMi]);
}
 
/*============================================================================*/
/* ᥤ                                                                     */
/*============================================================================*/
int main(int argc, char *argv[]){

  struct mssHeader *hdi; /*ϥե<head>Ǽ¤*/
  struct mssHeader *hdo; /*ϥե<head>Ǽ¤*/
  struct mssFPR    *fpr; /*ϥե빽¤*/
  struct mssFPW    *fpw; /*ϥե빽¤*/
  struct mssFldRec *fr; /*-ԥХåե¤*/

  struct Cluster *cluster=NULL;
  struct DataInfo *data;
  struct Sample   *sample[10]; /*ޥȡBF*/
  MssValue *disAccum;  /*ƥ饹γƥǡѵΥ*/
  MssValue  totalDis;   /*Υ*/
  int k;
  int i;
  int convCnt=0;

/*----------------------------------------------------------------------------*/
/*                                                                      */
/*----------------------------------------------------------------------------*/
  mssInit(argc,argv,&comHelp);       /* ʥʤɤν              */
  mssHelpDoc(opt,&comHelp,argc,argv);/* إ                                */
  mssSetOption(opt,argc,argv);       /* ޥɥץ              */
  fpr=mssOpenFPR(optINF.str,4);      /* ϥե륪ץ                  */
  hdi=mssReadHeader(fpr);            /* إåɤ߹                      */
  mssSetOptFld(&optNUM, hdi);        /* -n ܤإåܤ˴ϢŤ     */
  fnum=optNUM.flds;

/*mssShowOption(opt);*/
/*mssShowHeader(hdi);*/

  totalFldCnt=fnum->cnt;
  kCnt = optCNT.val; /* 饹 */

/*----------------------------------------------------------------------------*/
/*ϥإåκȽ                                                    */
/*----------------------------------------------------------------------------*/
  /*ϥإåν(ȥΥԡ)*/
  hdo=mssInitCpyHeader(hdi);
    
  /*ϥإåܤɲ*/
  mssAddFieldsByFields(hdo->flds,hdi->flds);

  /*̾ɲ*/
  mssAddFieldsByStrList(hdo->flds,optFNM.strList,optFNM.cnt);

  /*ɸϥץ+إåν*/
  fpw=mssOpenFPW(optOTF.str,optZIP.set,0);
  mssWriteHeader(hdo, fpw);

/*----------------------------------------------------------------------------*/
/*ᥤ롼                                                              */
/*----------------------------------------------------------------------------*/
  /*ϥǡγ*/
  data=getDatInfo(hdi,fpr);

  /*ν*/
  mssInitRand(optSED.val);

  mssVinit(&distance,DBL);

  /*ǡΥץ*/
  switch(optINI.val){
  case 0: /*---------------------------- */
    sampling(sample,1,100,data,hdi,fpr);
    break;

  case 1: /*---------------------------- Kaufman Approach*/
    sampling(sample,1,100, data,hdi,fpr);
    break;

  case 2: /*---------------------------- Bradley & Fayyad Approach*/
    sampling(sample,optMUL.val,100, data,hdi,fpr);
    break;
  }
  /*showSample(sample[0]);*/

  /*ϥǡͤȤkĤνſ*/
  switch(optINI.val){
  case 0: /*---------------------------- */
    cluster = initClusterRA(sample[0]);
    freeSample(sample[0]);
    break;

  case 1: /*---------------------------- Kaufman Approach*/
    cluster = initClusterKA(sample[0]);
    freeSample(sample[0]);
    break;

  case 2: /*---------------------------- Bradley & Fayyad Approach*/
    while(1){
      cluster = initClusterBF(sample);
      if(cluster==NULL) kCnt--;
      else              break;
    }
    for(i=0; i<optMUL.val; i++) freeSample(sample[i]);
    break;
  }
  /*showCluster(cluster);*/

  /*==================*/
  /* MAIN CONVERGENCE */
  /*==================*/
  while(1){
    convCnt++;
    setCluster(cluster,hdi,fpr);

    if(!movCenter(cluster))break;
  }
  /*showCluster(cluster);*/

  /*̤ɽ*/
  mssVinit(&totalDis,DBL);
  disAccum=mssCalloc(sizeof(MssValue)*kCnt,"xtkmean");
  for(i=0; i<kCnt; i++) mssVinit(disAccum+i,DBL);
  fr=mssInitFldRec(hdi->flds->cnt);
  mssSeekTopFPR(fpr);
  while( EOF != mssReadFldRec(fpr,fr) ){
    mssGV.inCnt++;

    k=nearestCluster(cluster,fr);
    if(k==-1){
      mssWriteFld(fr->pnt, fr->fldCnt, " ", fpw);
      mssWriteStr("*\n",fpw);
    }else{
      *(disAccum+k) = mssVadd(*(disAccum+k),distance);
      totalDis = mssVadd(totalDis,distance);
      mssWriteFld(fr->pnt, fr->fldCnt, " ", fpw);
      mssWriteInt(k+1,fpw);
      mssWriteRet(fpw);
    }
    mssGV.outCnt++;
  }

/*
  for(i=0; i<kCnt; i++){
    printf("distance[%d]=%g\n",i,*(disAccum+i));
  }
  printf(" totalDistance: %f ",sqrt(totalDis.v.d/(double)data->cnt));
  printf("convCnt: %d ",convCnt);
  printf("\n");
*/

  mssFree(disAccum);
  freeDataInfo(data); 
  freeCluster(cluster,kCnt);

/*----------------------------------------------------------------------------*/
/*եå&λ                                                       */
/*----------------------------------------------------------------------------*/
  mssWriteFooter(fpw);    /* եåν             */
  mssCloseFPR(fpr);       /* ϥեΥ     */
  mssCloseFPW(fpw);       /* ϥեΥ     */
  mssFreeHeader(hdi);     /* ϥإåΰ賫         */
  mssFreeHeader(hdo);     /* ϥإåΰ賫         */
  mssFreeOption(opt);     /* ץΰ賫         */
  mssShowEndMsg();        /* λå             */
  mssEnd(mssExitSuccess); /* λ                       */
  return(0);              /* to avoid warning message   */
}
