#!/usr/bin/env bash

#*****************************************************************************
# xtnfold version 1.1
# p̃f[^Zbg̍쐬
# T.Nakahara
# 2002/01/12
# Y.Hamuro
# 2002/01/29 -t̒ǉ, -d̃fBNgŌ/ŏIĂKpȂ
# 2003/02/03 ݂̃R}hɑΉ(1.0.1)
# 2004/01/21 tops /bin/bash -> /usr/bin/env bash ɕύX(ĉwE)
# 2004/01/21 wvŃo[W\悤ɕύX(1.0.2)
# 2004/11/29 gbvǉ(1.1), -t폜 
#*****************************************************************************
#G[bZ[W
function help {
cat >/dev/stderr <<EOF
------------------------
xtnfold.sh version 1.1
------------------------
Tv) p̃f[^Zbg쐬B
) xtnfold.sh -i t@C -d o̓fBNg -n fold -c NX
                 [-o g[jOt@C] [-O eXgt@C]
                 [-S ̎] [-t] [-V]
       -V : R}h̊bZ[W\B
       -h : wv̕\
) -cŎw肳ꂽڂɂwB
      -o-Oȗ΁A"##.train","##.test"Ƃt@CɂȂB
) xtnfold.sh -i dat -d fold -n 5 -c Class -o fold##.train -O fold##.test
EOF
exit 1
}

#bZ[WpɃR}hCۑ
cmdLine="$0 $*"

#ftHgł́AR}h̃bZ[WOFF
export mssQuiet=1

#VOiɂI
function endByInt {
  rm -f $TD-xx*
  echo "#ERROR# $$" \"$cmdLine\" \""end by signal(ctr^C)"\" >/dev/stderr
  exit 1
}

#p[^̃Zbg
rName='##-train.xt'
sName='##-test.xt'
while getopts ":i:d:n:c:S:k:o:O:Vh" opt; do
  case $opt in
    i  ) input=$OPTARG ;;
    d  ) path=$OPTARG ;;
    n  ) fold=$OPTARG ;;
    c  ) cls=$OPTARG ;;
    S  ) sed=$OPTARG ;;
    o  ) rName=$OPTARG ;;
    O  ) sName=$OPTARG ;;
    T  ) tmpD=$OPTARG ;;
    V  ) mssQuiet=0 ;;
    h  ) help ;;
    \? ) help ;;
  esac
done
shift $(($OPTIND -1 ))

if [ "$input" = "" ]; then
  echo "#ERROR# $$ \"$0\" \"option -i is mandatory\"" >/dev/stderr
  exit 1
fi
if [ "$path" = "" ]; then
  echo "#ERROR# $$ \"$0\" \"option -d is mandatory\"" >/dev/stderr
  exit 1
fi
if [ "$fold" = "" ]; then
  echo "#ERROR# $$ \"$0\" \"option -n is mandatory\"" >/dev/stderr
  exit 1
fi
if [ "$cls" = "" ]; then
  echo "#ERROR# $$ \"$0\" \"option -c is mandatory\"" >/dev/stderr
fi
if [ "$sed" = "" ]; then
   sed=-1
fi
if [ "$tmpD" = "" ]; then
  tmpD=/tmp
fi


#g[jOƃeXg̓ꖼ`FbN
if [ "$rName" = "$sName" ]; then
  echo "#ERROR# $$ \"$0\" \"file names for training and test must be different\"" >/dev/stderr
  exit 1
fi

#̓t@C`FbN
if [ ! -f $input ]; then
  echo "#ERROR# $$ \"$0\" \"file not found : $input\"" >/dev/stderr
  exit 1
fi

#-d̂"/"Ƃ
path=`echo $path | sed 's/\/*$//'`

#[Nt@C̃vtBbNX̐ݒ
wf=$tmpD/mss-xtnfold-$$

#gbv
trap endByInt INT QUIT TERM HUP

#o̓fBNg݂ȂȂ쐬
if [ ! -d $path ]; then
   mkdir -p $path
fi

#cls𕪂邽߂foldɂ킹clsԍӂ
xtrand -a "##rand##" -S $sed -i $input |
xtcal  -k$cls -s "##rand##%n" -c 'down(keyLine()/(keyCnt()/'$fold'+0.00001),1)' -a "##val##" >$wf-03

#clsԍɂ킹Ăꂼ̃t@Cɏo͂
loop=0
while [ "$loop" -lt "$fold" ]; do
  xtsel -c'$##val## -eq '$loop -i$wf-03\
        -u$wf-trn-$(($loop+1)) -o $wf-tst-$(($loop+1))
  loop=$((loop+1)) 
done;

loop=1
while [ $loop -le $fold ] ; do
  trnName=`echo $rName | sed 's/##/'$loop'/'`
  tstName=`echo $sName | sed 's/##/'$loop'/'`
  tprName=$wf-trn-$loop
  tpsName=$wf-tst-$loop

  xtcut -rf "##val##,##rand##" -i$tprName -o"$path"/$trnName
  xtcut -rf "##val##,##rand##" -i$tpsName -o"$path"/$tstName

  loop=$((loop+1)) 
done;

#[Nt@C폜
rm $wf-*

#bZ[W\
echo "#END# $$" \"$cmdLine\" >/dev/stderr
exit 0
# ==============================================================================
