/*
 * Copyright (c) 2001-2003 NAIST
 * All rights reserved
 */

/* 
 * adintool --- AD-in tool to record/split/send/receive speech data
 *
 * $Id: adintool.c,v 1.12 2003/10/04 12:09:33 ri Exp $
 * 
 */

#include <sent/stddefs.h>
#include <sent/speech.h>
#include <sent/adin.h>
#include <sent/tcpip.h>

#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

static int total_speechlen;
static int speechlen;		/* data length of recorded sample */
static int fd;			/* file descriptor for RAW output */
static FILE *fp;		/* file pointer for WAV output */

#ifdef USE_MIC
static int speech_input = SP_MIC;
#else
static int speech_input = SP_RAWFILE;
#endif
#ifdef USE_NETAUDIO
static char *netaudio_devname = NULL;
#endif
static boolean strip_zero_sample = TRUE;
static boolean do_segment = TRUE;
static boolean continuous_segment = TRUE;
static int sfreq = 16000;
static int level_thres = 2000;
static int zero_cross_num = 60;
static int head_margin = 400;
static int tail_margin = 400;
static char *infile = NULL;	/* input file name */

enum{SPOUT_FILE, SPOUT_STDOUT, SPOUT_ADINNET};
static int speech_output = SPOUT_FILE;
static char *filename = NULL;	/* output file name */
static int startid = 0;
static int adinnet_port = ADINNET_PORT;
static char *adinnet_serv = NULL;

static boolean use_raw = FALSE;	/* output in RAW format */

void
usage()
{
  fprintf(stderr, "adintool --- AD-in tool to record/split/send/receive speech data\n");
  fprintf(stderr, "Usage: adintool [options] -in inputdev -out outputdev\n");
  fprintf(stderr, "inputdev: read speech data from:\n");
#ifdef USE_MIC
  fprintf(stderr, "    mic         microphone (default)\n");
#endif
#ifdef USE_NETAUDIO
  fprintf(stderr, "    netaudio    DatLink (NetAudio) server\n");
#endif
  fprintf(stderr, "    file        speech file (filename given from prompt)\n");
  fprintf(stderr, "    adinnet     from adinnet client (I'm server)\n");
  fprintf(stderr, "    stdin       standard tty input\n");
  fprintf(stderr, "outputdev: output data to:\n");
  fprintf(stderr, "    file        speech file (\"foo.0000.wav\" - \"foo.N.wav\"\n");
  fprintf(stderr, "    adinnet     to adinnet server (I'm client)\n");
  fprintf(stderr, "    stdout      standard tty output\n");
  
  fprintf(stderr, "I/O options:\n");
#ifdef USE_NETAUDIO
  fprintf(stderr, "    -NA             (netaudio) NetAudio server host:unit\n");
#endif
  fprintf(stderr, "    -server host    (adinnet-out) server hostname\n");
  fprintf(stderr, "    -port number    (adinnet-in/out) port number (%d)\n", adinnet_port);
  fprintf(stderr, "    -filename foo   (file-out) filename to record\n");
  fprintf(stderr, "    -startid id     (file-out) recording start id (%04d)\n", startid);

  fprintf(stderr, "Recording and Pause segmentation options:\n");
  fprintf(stderr, "  [-nosegment]          not segment input speech\n");
  fprintf(stderr, "  [-oneshot]            record only the first segment\n");
  fprintf(stderr, "  [-freq frequency]     sampling frequency in Hz              (%d)\n", sfreq);
  fprintf(stderr, "  [-lv unsignedshort]   level threshold (0-32767)             (%d)\n", level_thres);
  fprintf(stderr, "  [-zc zerocrossnum]    zerocross num threshold (per sec.)    (%d)\n", zero_cross_num);
  fprintf(stderr, "  [-headmargin msec]    header margin length in msec.         (%d)\n", head_margin);
  fprintf(stderr, "  [-tailmargin msec]    tail margin length in msec.           (%d)\n", tail_margin);
  fprintf(stderr, "  [-nostrip]            do not strip zero samples\n");
  fprintf(stderr, "  [-raw]                output in RAW format\n");
  exit(1);
}

void
put_status()
{
  fprintf(stderr,"----\n");
  fprintf(stderr,"Input-Source: ");
  switch(speech_input) {
  case SP_RAWFILE: fprintf(stderr,"Wave File (filename from stdin)\n"); break;
#ifdef USE_MIC
  case SP_MIC: fprintf(stderr,"Microphone\n"); break;
#endif
#ifdef USE_NETAUDIO
  case SP_NETAUDIO: fprintf(stderr,"NetAudio(DatLink) server on %s\n", netaudio_devname); break;
#endif
  case SP_STDIN: fprintf(stderr,"Standard Input\n"); break;
  case SP_ADINNET: fprintf(stderr,"adinnet client (port %d)\n", adinnet_port); break;
  }
  fprintf(stderr,"Segmentation: ");
  if (do_segment) {
    if (continuous_segment) {
      fprintf(stderr,"on, continuous\n");
    } else {
      fprintf(stderr,"on, only one snapshot\n");
    }
    fprintf(stderr,"  SampleRate: %d Hz\n", sfreq);
    fprintf(stderr,"       Level: %d / 32767\n", level_thres);
    fprintf(stderr,"   ZeroCross: %d per sec.\n", zero_cross_num);
    fprintf(stderr,"  HeadMargin: %d msec.\n", head_margin);
    fprintf(stderr,"  TailMargin: %d msec.\n", tail_margin);
  } else {
    fprintf(stderr,"OFF\n");
  }
  if (strip_zero_sample) {
    fprintf(stderr,"  ZeroFrames: drop\n");
  } else {
    fprintf(stderr,"  ZeroFrames: keep\n");
  }
  fprintf(stderr,"Recording: ");
  switch(speech_output) {
  case SPOUT_FILE:
    if (do_segment) {
      if (continuous_segment) {
	if (use_raw) {
	  fprintf(stderr,"%s.%04d.raw, %s.%04d.raw, ...\n", filename,startid, filename, startid+1);
	} else {
	  fprintf(stderr,"%s.%04d.wav, %s.%04d.wav, ...\n", filename,startid, filename, startid+1);
	}
      } else {
	fprintf(stderr,"%s\n", filename);
      }
    } else {
      fprintf(stderr,"%s (warning: be care of disk space!)\n", filename);
    }
    break;
  case SPOUT_STDOUT:
    fprintf(stderr,"STDOUT\n");
    use_raw = TRUE;
    break;
  case SPOUT_ADINNET:
    fprintf(stderr,"(adinnet server [%s %d])\n", adinnet_serv, adinnet_port);
    break;
  }
  fprintf(stderr,"----\n");
}    

void
opt_parse(int argc, char *argv[])
{
  int i;
  /* option parsing */
  if (argc <= 1) usage();
  for (i=1;i<argc;i++) {
    if (!strcmp(argv[i], "-in")) {
      if (++i >= argc) usage();
      switch(argv[i][0]) {
      case 'm':
#ifdef USE_MIC
	speech_input = SP_MIC;
#else
	fprintf(stderr,"Error: mic input not available\n");
	usage();
#endif
	break;
      case 'f':
	speech_input = SP_RAWFILE;
	break;
      case 's':
	speech_input = SP_STDIN;
	break;
      case 'a':
	speech_input = SP_ADINNET;
	break;
      case 'n':
#ifdef USE_NETAUDIO
	speech_input = SP_NETAUDIO;
#else
	fprintf(stderr,"Error: netaudio input not available\n");
	usage();
#endif
	break;
      default:
	fprintf(stderr,"Error: no such input device: %s\n", argv[i]);
	usage();
      }
    } else if (!strcmp(argv[i], "-out")) {
      if (++i >= argc) usage();
      switch(argv[i][0]) {
      case 'f':
	speech_output = SPOUT_FILE;
	break;
      case 's':
	speech_output = SPOUT_STDOUT;
	break;
      case 'a':
	speech_output = SPOUT_ADINNET;
	break;
      default:
	fprintf(stderr,"Error: no such output device: %s\n", argv[i]);
	usage();
      }
    } else if (!strcmp(argv[i], "-server")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      if (speech_output == SPOUT_ADINNET) {
	adinnet_serv = argv[i];
      } else {
	fprintf(stderr, "Warning: server [%s] ignored\n", argv[i]);
	usage();
      }
    } else if (!strcmp(argv[i], "-NA")) {
#ifdef USE_NETAUDIO
      if (speech_input == SP_NETAUDIO) {
	netaudio_devname = argv[i];
      } else {
	fprintf(stderr, "Warning: use \"-NA\" with \"-in netaudio\"\n");
      }
#else  /* ~USE_NETAUDIO */
      fprintf(stderr, "Error: NetAudio(DatLink) not supported\n");
      usage();
#endif
    } else if (!strcmp(argv[i], "-port")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      adinnet_port = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-filename")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      filename = argv[i];
    } else if (!strcmp(argv[i], "-startid")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      startid = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-freq")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      sfreq = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-lv")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      level_thres = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-zc")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      zero_cross_num = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-headmargin")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      head_margin = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-tailmargin")) {
      i++; if (i >= argc || argv[i][0] == '-') usage();
      tail_margin = atoi(argv[i]);
    } else if (!strcmp(argv[i], "-nostrip")) {
      strip_zero_sample = FALSE;
    } else if (!strcmp(argv[i], "-nosegment")) {
      do_segment = FALSE;
    } else if (!strcmp(argv[i], "-oneshot")) {
      continuous_segment = FALSE;
    } else if (!strcmp(argv[i], "-raw")) {
      use_raw = TRUE;
    } else if (!strcmp(argv[i], "-h")) {
      usage();
    } else if (!strcmp(argv[i], "-help")) {
      usage();
    } else if (!strcmp(argv[i], "--help")) {
      usage();
    } else {
      fprintf(stderr,"Unknown option: \"%s\"\n", argv[i]);
      usage();
    }
  }
}


/* callbacks for recorded sample fragments */
/* return value:
   -1 ... error -> exit, terminate 
   0  ... continue
   1  ... segmented -> exit, resume (not drop buffer)
*/
/* output to file */
static int
adin_callback_file(SP16 *now, int len)
{
  int count;
  /* it will be safe to limit the maximum record len for disk space */
  /*if (speechlen + len > MAXSPEECHLEN) {
    fprintf(stderr, "Error: too long input (> %d samples)\n", MAXSPEECHLEN);
    return(FALSE);
    }*/
  if (use_raw) {
    count = wrsamp(fd, now, len);
    if (count < 0) {perror("adintool: cannot write");}
    if (count < len * sizeof(SP16)) {fprintf(stderr, "adintool: cannot write more %d bytes\ncurrent length = %d\n", count, speechlen * sizeof(SP16));}
  } else {
    if (wrwav_data(fp, now, len) == FALSE) {
      perror("adintool: cannot write");
    }
  }
  
  speechlen += len;
  
  fprintf(stderr, ".");		/* output progress bar in dots */
  return(0);
}
static int
adin_callback_adinnet(SP16 *now, int len)
{
  int count;
  count = wt(fd, (char *)now, len * sizeof(SP16));
  if (count < 0) perror("adintool: cannot write");
  speechlen += len;
  
  fprintf(stderr, ".");		/* output progress bar in dots */
  return(0);
}
static void
adin_send_end_of_segment()
{
  char *p;
  if (wt(fd, p,  0) < 0) perror("adintool: cannot write");
}  

int
main(int argc, char *argv[])
{
  size_t count,size;
  int sid;
  int ret;
  boolean outloop;
  char *outpath;

  /* parse option */
  opt_parse(argc, argv);

  /* check options */
  if (speech_output == SPOUT_FILE && filename == NULL) {
    fprintf(stderr, "Error: output filename not specified\n");
    exit(1);
  }
  if (speech_output == SPOUT_ADINNET && adinnet_serv == NULL) {
    fprintf(stderr, "Error: adinnet server name for output not specified\n");
    exit(1);
  }
#ifdef USE_NETAUDIO
  if (speech_input == SP_NETAUDIO && netaudio_devname == NULL) {
    fprintf(stderr, "Error: NetAudio server name not specified\n");
    exit(1);
  }
#endif

  /* input device initialization */
  {
    char *arg;
    
    /* select input */
    if (adin_select(speech_input) == FALSE) {
      j_printerr("Error: invalid input device\n");
      exit(1);
    }
    /* set device-dependent param (this part is device dependent) */
    if (speech_input == SP_ADINNET) {
      arg = mymalloc(100);
      sprintf(arg, "%d", adinnet_port);
#ifdef USE_NETAUDIO
    } else if (speech_input == SP_NETAUDIO) {
      arg = mymalloc(strlen(netaudio_devname + 1));
      strcpy(arg, netaudio_devname);
#endif
    } else {
      arg = NULL;
    }
    if (adin_standby(sfreq, arg) == FALSE) {
      j_printerr("Error: failed to standby input\n");
      exit(1);
    }
    /* set device-independent param */
    adin_setup_param(do_segment ? 1 : 0, strip_zero_sample, level_thres, zero_cross_num, head_margin, tail_margin, sfreq);
    if (query_segment_on() != do_segment) { /* check result */
      j_printerr("Error: cannot set segmentation status\n");
      j_printerr("Error: invalid segmentation setting for the device\n");
      exit(1);
    }
  }
  
  /* disable separation when no segmentation */
  if (!do_segment) continuous_segment = FALSE;

  /* print status */
  put_status();

  /* output setup */
  if (speech_output == SPOUT_FILE) {
    if (continuous_segment) {
      outpath = (char *)mymalloc(strlen(filename) + 10);
    } else {
      outpath = filename;
    }
  } else if (speech_output == SPOUT_ADINNET) {
    fprintf(stderr, "connecting to %s:%d...", adinnet_serv, adinnet_port);
    fd = make_connection(adinnet_serv, adinnet_port);
    if (fd < 0) return 1;
    fprintf(stderr, "connected\n");
  } else if (speech_output == SPOUT_STDOUT) {
    fd = 1;
    fprintf(stderr,"[STDOUT]");
  }
  if (continuous_segment) {
    total_speechlen = 0;
    sid = startid;
  }

  /* begin recording */
  fprintf(stderr,"[start recording]\n");
  
  /* init input device */
  if (!adin_begin()) return(1);
  
  /* begin continuous segmentation loop */
  do {
    /* open output filename */
    if (speech_output == SPOUT_FILE) {
      if (continuous_segment) {
	if (use_raw) {
	  sprintf(outpath, "%s.%04d.raw", filename, sid);
	} else {
	  sprintf(outpath, "%s.%04d.wav", filename, sid);
	}
      }
      fprintf(stderr,"[%s]", outpath);
      if (access(outpath, F_OK) == 0) {
	if (access(outpath, W_OK) == 0) {
	  fprintf(stderr, "(override)", filename);
	} else {
	  perror("adintool");
	  return(1);
	}
      }
      if (use_raw) {
	if ((fd = creat(outpath, 0644)) == -1) {
	  perror("adintool");
	  return 1;
	}
      } else {
	if ((fp = wrwav_open(outpath, sfreq)) == NULL) {
	  perror("adintool");
	  return 1;
	}
      }
    }
    /* do write one segment with segmentation */
    /* for incoming speech input, speech detection and segmentation are
       performed and, adin_callback_* is called for speech output for each segment block.
     */
    speechlen = 0;
    if (speech_output == SPOUT_ADINNET) {
      ret = adin_go(adin_callback_adinnet, NULL);
    } else {
      ret = adin_go(adin_callback_file, NULL);
    }
    j_printf("[%d]\n", ret);
    /* return value of adin_go:
       -1: input device read error or callback process error
       0:  paused by input stream (end of file, etc..)
       >0: detected end of speech segment:
             by adin-cut, or by callback process
       (or return value of ad_check (<0) (== not used in this program))
    */
    j_printerr("\n");

    if (ret < 0) {
      /* error in input device or callback, terminate program */
      return 1;
    }
    
    if (speech_output == SPOUT_ADINNET && ret > 0) {
    }

    if (speech_output == SPOUT_FILE) {
      /* close output file */
      if (use_raw) {
	if (close(fd) < 0) {perror("adintool"); return 1;}
      } else {
	if (wrwav_close(fp) != TRUE) {perror("adintool"); return 1;}
      }
      /* output info */
      j_printf("%s: %d samples (%.2f sec.)\n", outpath, speechlen, (float)speechlen / (float)sfreq);
    } else if (speech_output == SPOUT_ADINNET) {
      if (ret > 0) {		/* segmented by adin-cut */
	/* send end-of-segment ack to cient */
	adin_send_end_of_segment();
      }
      /* output info */
      j_printf("send: %d samples (%.2f sec.)\n", speechlen, (float)speechlen / (float)sfreq);
    }
    if (continuous_segment) {
      total_speechlen += speechlen;
      sid++;
    }
  } while (continuous_segment && ret > 0); /* end of loop*/
    
  if (speech_output == SPOUT_FILE) {
    if (continuous_segment) {
      j_printf("total %d samples (%.2f sec.) segmented to %s.%04d - %s.%04d files\n", total_speechlen, (float)total_speechlen / (float)sfreq, filename, 0, filename, sid-1);
    }
  }

  /* end device */
  adin_end();

  return 0;
}

