/* 
 * Copyright (c) 2003-2005 RIKEN Japan, All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY RIKEN AND CONTRIBUTORS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL RIKEN OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/* $Id: LexicalAnalizer.cpp,v 1.5 2005/01/25 12:29:17 orrisroot Exp $ */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include "SL_header.h"
#include <cctype>

#include <libsatellite.h>

using namespace std;

#define  __IMPORTSYMBOL__
#include "libsatellite.h"
#include "SL_exception.h"
#include "history.h"
#include "module.h"
#include "tty_console.h"
#include "SL_Index.h"
#include "Base_Buffer.h"
#include "Series_Buffer.h"
#include "Snapshot_Buffer.h"
#include "String_Buffer.h"
#include "Scalar_Buffer.h"
#include "SL_Tool.h"
#include "SL_Object.h"
#include "SymbolList.h"
#include "SystemCommon.h"
#undef   __IMPORTSYMBOL__
#include "parse.h"
#include "Datum.h"
#include "Program.h"
#include "Inline.h"
#include "CommandAlias.h"
#include "Builtin.h"
#include "LexicalAnalizer.h"
#include "pipe.h"
#include "vmstat.h"
#define   __EXPORTSYMBOL__
#include "StackMachine.h"
#undef    __EXPORTSYMBOL__

#define NaN (-1)
#define Return(type) {top=false; return type;}

LexicalAnalizer::LexicalAnalizer(StackMachine *s, tty_console *con) : 
  stackmachine(s), console(con), in_comment(0), top(true), streamptr(stream), 
  contflag(true), keep_alive(true),
  rc_finished(false), setup_finished(true), clean_finished(false), 
  clean_need(false){
}

LexicalAnalizer::~LexicalAnalizer(){
  while(!inline_stack.empty()){
    delete inline_stack.top();
    inline_stack.pop();
  }
}

bool LexicalAnalizer::moreinput(int yyparse_flag){
  if(!rc_finished || !setup_finished || !infiles.empty()){ contflag=true; return contflag;}
  if(!keep_alive)    contflag=false;
  if(clean_finished) return false;
  if(contflag && yyparse_flag)return contflag;
  else {
    /*    const char *tmp;
    console->tty_print("Really Quit ? ");
    tmp=console->input();
    if(tmp[0]=='y' || tmp[0]=='Y'){ */
    if(clean_need){
      stackmachine->Program_Init();
      clean_finished=true;
      return true;
    }
    return false; 
    /* } else contflag=true;
       stackmachine->Program_Init(); */
  }
  return contflag;
}

// Public Method
int LexicalAnalizer::yylex(){
  char *ln;
  space_skip();
  // number
  if(c=='.' || isdigit(c)){
    if(number()!=NaN){
      Return(NUMBER);
    }
  }

  // token 
  if(isalpha(c)){
    int type=token();
    Return(type);
  }

  if(top && (c=='/' || c=='.')){ // cf. /usr/local/bin/tcsh.
    char buf[ONELINE];
    ln=getline("",c);
    if(is_executable_command(ln, buf, ONELINE)){
      free(ln);
      ln = strdup(buf);
    }
    yylval.strval = ln;
    Return(EXT_COM);
  }

  top=false;
  // symbols
  try{
    switch (c){
    case EOF:
      return 0;	// end of file
    case '\\':
      ln=getline("",'\0');
      yylval.strval=ln;
      return EXT_COM;
    case '"':
      return quoted_string('"');
    case '\'':
      return quoted_string('\'');
    case '`':
      quoted_string('`');
      return SYSTEM_COM;
      // relative operator
    case '>':
      return follow('>',1,0) ? RPIPE : follow('=',GE,GT); 
    case '<':
      return follow('<',1,0) ? LPIPE : follow('=',LE,LT); 
    case '=':
      return follow('=',EQ,'=');
    case '!':
      return follow('=',NE,NOT);
    case '|':
      return follow('|',OR,'|');
    case '&':
      return follow('&',AND,'&');
      
      // assginment operator and '++' ,'--' */
    case '+':
      yylval.intval = MATH_ADD;
      return follow('=',1,0) ? ASGN_OP : follow('+',INCDEC,'+');
    case '-':
      yylval.intval = MATH_SUB;
      return follow('=',1,0) ? ASGN_OP : follow('-',INCDEC,'-');
    case '*':
      yylval.intval = MATH_MUL;
      return follow('=',ASGN_OP,'*');
    case '/':
      yylval.intval = MATH_DIV;
      return follow('=',ASGN_OP,'/');
    case '%':
      yylval.intval = MATH_MOD;
      return follow('=',ASGN_OP,'%');
    case '^':
      yylval.intval = MATH_POW;
      return follow('=',ASGN_OP,'^');
    case '\r':
      return follow('\n','\n','\n');
    case '\n':
      return '\n';
    case '\0':
      return '\n';
    default:
    return c;
    }
  }catch(execerr_exception){ return '\n'; }
}

bool LexicalAnalizer::InitStream(const char *st){
  if(st==0)return false;
  if(strlen(st)>0){
    strcpy(stream,st);
    prepro(stream,ONELINE);
    streamptr=stream;
    return true;
  }
  return false;
}

int LexicalAnalizer::getstream(int pn){
  bool first=true;
  streamptr=stream;           // beginning of stream
  top=true;                   // set line-top flag
  // system setup
  if(!console->is_file()){
    if(!rc_finished){
      sprintf(stream,"inline(\"");
      strcat(stream,rc_file.c_str());
      strcat(stream,"\");");
      rc_finished=true;
      return 1;
    }
    if(!setup_finished){
      sprintf(stream,"inline(\"");
      strcat(stream,setup_file.c_str());
      strcat(stream,"\");");
      setup_finished=true;
      return 1;
    }
    if(!infiles.empty()){
      sprintf(stream,"inline(\"");
      strcat(stream,(*(infiles.begin())).c_str());
      strcat(stream,"\");");
      infiles.pop_front();
      return 1;
    }
    if(clean_need && clean_finished){
      sprintf(stream,"inline(\"");
      strcat(stream,clean_file.c_str());
      strcat(stream,"\");");
      return 1;
    }
  }
  stream[0]='\0';
  if(console->readline(stream, ONELINE, pn) == 0){
    break_comment(1);
    if(console->is_file()){
      stackmachine->end_inline();
      contflag=true;
    }else{
      contflag=false;
    }
    return 0;
  }
  if(stream[0]=='\0'){
    contflag=false;
    return 0;
  }

  // skipping comment
  comment_proc();

  prepro(stream,ONELINE);
  //if(prepro(stream,first));
    //    console_va_print(true,"(%s)",stream);
  //rmnl(stream);                 // for file input

  // getty(pp,stream); // get tty
  space_skip();
  streamptr=stream;
  contflag=true;
  return 1;
}


//------------ Private Method --------------------------------

// parts of yylex()
// number()
int LexicalAnalizer::number(){
  double d;
  char sbuf[100], *next;
  size_t len;
  stream_ungetc();
  next=getnum((unsigned char*)streamptr); // (*next) is not a number
  len=(size_t)(next-streamptr);
  if(len==0){
    stream_getc();
    return NaN;
  }
  sbuf[len]='\0';
  strncpy(sbuf,streamptr,len); // number's strings into sbuf
  // sscanf(sbuf,"%lf",&d);       // string to number
  d=atof(sbuf);
  streamptr=next;              // set next pointer
  yylval.fltval = d;
  return NUMBER;
}

int LexicalAnalizer::token(){
  char            sbuf[ONELINE];
  symbol_t       *s;

  stream_ungetc();
  streamptr=gettoken(streamptr,sbuf,sizeof(sbuf),0);

  // --> temporary ... for flex 
  // I think these keywords ware not necessary to symbol list.
  if(!strcmp(sbuf,"exit")){       return 0; }
  if(!strcmp(sbuf,"Module_Dll")){ return MODULE_DLL; }
  if(!strcmp(sbuf,"Module_Ini")){ return MODULE_INI; }
  if(!strcmp(sbuf,"module")){     return MODULE; }
  if(!strcmp(sbuf,"define")){     return DEFINE; }
  if(!strcmp(sbuf,"inline")){     return INLINE; }
  if(!strcmp(sbuf,"set")){        return SET; }
  if(!strcmp(sbuf,"const")){      return CONST_T; }
  if(!strcmp(sbuf,"proc")){       return SLPROC; }
  if(!strcmp(sbuf,"func")){       return FUNC; }
  if(!strcmp(sbuf,"external")){   return EXTERNAL; }
  if(!strcmp(sbuf,"return")){     return RETURN; }
  if(!strcmp(sbuf,"break")){      return BREAK; }
  if(!strcmp(sbuf,"continue")){   return CONTINUE; }
  if(!strcmp(sbuf,"if")){         return IF; }
  if(!strcmp(sbuf,"else")){       return ELSE; }
  if(!strcmp(sbuf,"while")){      return WHILE; }
  if(!strcmp(sbuf,"do")){         return DO; }
  if(!strcmp(sbuf,"for")){        return FOR; }
  if(!strcmp(sbuf,"read")){       return READ; }
  if(!strcmp(sbuf,"undef")){      return UNDEF_VAR; }
  if(!strcmp(sbuf,"undefall")){   return UNDEFALL; }
  if(!strcmp(sbuf,"isdef")){      return ISDEF_VAR; }
  if(!strcmp(sbuf,"system")){     return SYSTEM; }

  // <-- ...
  // look up symbol table
  s=symbol_table_lookup(syscom->gl_symtab,sbuf);
  if(s==0)
    s=symbol_table_lookup(syscom->cur_symtab,sbuf);

  if(top){
    if(s==0 || symbol_get_type(s) == SYMBOL_TYPE_UNDEF){
      char full[ONELINE];
      char *tmp;
      char sepa=stream_lookup();
      tmp = streamptr;
      space_skip(); /* set next char to 'c' */
      streamptr = tmp;
      if(is_internal_cmd(sbuf)){
        char *ln;
        sepa=(sepa!=c) ? sepa : '\0';
        ln=getline(sbuf,sepa);
        yylval.strval=ln;
        return INT_COM;
      }
      if(is_external_cmd(sbuf,full,ONELINE)){
        char *ln;
        sepa=(sepa!=c) ? sepa : '\0';
        ln=getline(full,sepa);
        yylval.strval=ln;
        return EXT_COM;
      }
    }
  }

  // VAR
  if(s==0){ // undefined variable
    s=symbol_new(sbuf, NULL, SYMBOL_TYPE_UNDEF);
    if(s == NULL){
      printf("fatal error : cound not create new symbol (%s)\n",sbuf);
      exit(1);
    }
    symbol_table_install(syscom->cur_symtab,s);
  }
  yylval.sym=s;  // for parser
  switch(symbol_get_type(s)){
  case SYMBOL_TYPE_UNDEF:  return VAR;
  case SYMBOL_TYPE_VAR:    return VAR;
  case SYMBOL_TYPE_CONST:  return CONSTANT;
  case SYMBOL_TYPE_PROC:   return PROCEDURE;
  case SYMBOL_TYPE_FUNC:   return FUNCTION;
  case SYMBOL_TYPE_MODULE: return MODULE_NAME;
  case SYMBOL_TYPE_CLASS:  return CLASS_T;
  case SYMBOL_TYPE_BLTIN:  return BLTIN;
  case SYMBOL_TYPE_OPCODE: return OPCODE;
  case SYMBOL_TYPE_SATCOM: return SAT_COM;
  default:
    printf("unknown type [%d]\n", symbol_get_type(s));
    return 0;
  }
  return 0;
}

bool LexicalAnalizer::is_internal_cmd(const char *sbuf){
  int i;
  static const char *shell_builtin_cmd[]={
    "ls","dir","chdir","cd",0
  };
  if(sbuf==0 || *sbuf=='\0') return false; //for fail-safe
  for(i=0; shell_builtin_cmd[i]!=0; i++){
    if(strcmp(sbuf, shell_builtin_cmd[i]) == 0) return true;
  }
  return false;
}
bool LexicalAnalizer::is_external_cmd(const char *sbuf, char *full, size_t len){
  switch (c){
  case '=':
  case '[':
    return false;
  case '/':
  case ':':
    { /* quick hack for command path with drive letter */
      char buf[ONELINE], *tmp, *next;
      size_t sbuflen = strlen(sbuf);
      if((sbuflen == 1 && *sbuf == '.') ||     /* path name of ./hoge */
#ifdef WIN32
         (sbuflen == 1 && isalpha(*sbuf)) ||   /* C:\\hoge */
#endif
         (sbuflen == 2 && !strcmp(sbuf,".."))){ /* path name of ../home */
        /* save streamptr */
        tmp = streamptr;
        strcpy(buf,sbuf);
        next   = gettoken(streamptr,&buf[sbuflen],ONELINE-1,1);
        if(is_executable_command(buf, full, len) == 0){
          /* not executable command */
          streamptr = tmp;
          return false;
        }else{
          /* it is executable command */
          streamptr = next;
          return true;
        }
      }
    }
    return false;
//  case '<':
//    return (stream_lookup()=='<') ? false : true;
  default:
    if(is_executable_command(sbuf,full,len) == 0) return false;
    return true;
  }
}

// quoted string
int LexicalAnalizer::quoted_string(int quote){
  static char sbuf[ONELINE];
  char *p;
  for(p=sbuf; (c=stream_getc())!=quote; p++){
    if(c=='\n' || c==EOF){
      console->execerror(0,"missing quote");
    }
    if(p>=sbuf+sizeof(sbuf)-1){
      console->execerror(0,"string too long");
    }
    *p=backslash(c);
  }
  *p='\0';
  yylval.strval = strdup(sbuf);
  if(yylval.strval == NULL)
    console->execerror(0,"out of memory");
  return STRING;
}

// getline()
char  *LexicalAnalizer::getline(char *str,int sepa){
  // command line
  char  sbuf[ONELINE],*p;
  char *sym;
  sym=0; // for compiler
  for (p=sbuf; (c=stream_getc())!='\n' && c!='\0';p++){
    // && c!='}' && c!=';' && c!='&'
    if(p>=sbuf+sizeof(sbuf)-1){
      string err;
      *p='\0';
      err="string too long ";
      err+=sbuf;
      console->execerror(0,err.c_str());
    }
    *p=c; // backslash(c); TODO CHECKME
  }
  stream_ungetc(); // for newline
  *p='\0';

  /* size=str+sepa+sbuf+'\0' */
  sym = (char*)malloc(sizeof(char)*(strlen(str)+strlen(sbuf)+2));
  if(sym == NULL){
    console->execerror(0,"out of memory");
  }
  if(str==0) str="";
  if(sepa!='\0')
    sprintf(sym,"%s%c%s",str,sepa,sbuf);
  else
    sprintf(sym,"%s%s",str,sbuf);
  return sym;
  // TODO: here is memory leak!!!
}

// follow()
int LexicalAnalizer::follow(int expect,int ifyes,int ifno){
  /* look ahead for >=,etc. */
  int  ch=stream_getc();
  if(ch==expect){
    return ifyes;
  }
  stream_ungetc();
  return ifno;
}

// baskcslash
int LexicalAnalizer::backslash(int ch){
  // get next char with \'s interpreted
  static char     transtab[]="b\bf\fn\nr\rt\t";
  if(ch!='\\')
    return ch;

  ch=stream_getc();
  if(islower(ch) && strchr(transtab,ch))
    return strchr(transtab,ch)[1];
  return ch;
}

void LexicalAnalizer::space_skip(){
  while((c=stream_getc())==' '||c=='\t');
}


void LexicalAnalizer::flush_stream(){
  while (c != ';' && c != '\n' && c != EOF)
    c = stream_getc(); /* flush rest of input line */
}

// ------------------------------------------------------
// -------------- parts of getstream() ------------------
// break_comment()
int LexicalAnalizer::break_comment(int echo){
  int comment_status=in_comment;
  in_comment=0;
  if(comment_status &&  echo){
    console->warning("Program ended ","befor \"*/\" was complete.\n");
  }
  return comment_status;        
}

// comment_proc()
void LexicalAnalizer::comment_proc(){
  register char  *p;
  // C like comment
  if(console->is_file()){
    c_like_comment();
  }
  // comment '#' and mark of derimiter
  p=nextarg(stream,";#");
  if( p>stream)
    for (; p!=0; p=nextarg(p,";#")){
      if(p[-1]=='#') // Comment out
        p[-1]='\0';
      else
        p[-1]=';'; // BELL;  // ';' ----> 0x07 ?
    }
  // top of stream buffer
  streamptr=stream;
}

// c_like_comment()
int LexicalAnalizer::c_like_comment(){
  char            str[ONELINE];
  register char   *p=str;
  bool         in_wquote=false;    /* toggle switch */
  c=stream_getc();
  if(c=='\\') return 0;
  while(c!='\0'){
    if(in_wquote ||(!in_comment && (c!='/' || stream_lookup()!='*'))){
      if(c=='"')
        in_wquote=(in_wquote)?false:true;
      *p++=c;
      c=stream_getc();
      continue; /* normal statements */
    }
    do {                /* in comments */
      if(c=='/' && stream_lookup()=='*'){
        c=stream_getc();     /* remove '*' */
        in_comment++;
      } else if(c=='*' && stream_lookup()=='/'){
        c=stream_getc();     /* remove '/' */
        in_comment--;
      }
    } while((c=stream_getc())!='\0' && in_comment);
  }
  *p='\0';
  strcpy(stream,str);
  return 1;
}

// nextarg()
char *LexicalAnalizer::nextarg(char *sbuf,const char *separator){
  register int    i;
  register char  *p;
  
  for (p=sbuf; *p!='\0'; p++){
    for (i=0; separator[i]!=0; i++){
      if(*p==separator[i]){
        return p+1;
      }
    }
    if(*p=='"'){
      while(*++p!='"' && *p!='\0')
        /* quoted_string skip */ ;
    }
  }
  return 0;
}

void LexicalAnalizer::prepro(char *oneline, size_t bsize){
  char *p;
  if(oneline == 0 || *oneline == '\0') return;
  for(p=oneline;*p;p++)
    if(*p==0x07) *p=' '; /* replace tab to space */
  /* do alias */
  alias_proc(oneline, bsize);
  /* do history */
  // history_proc(tmp, ONELINE);
}

void LexicalAnalizer::alias_proc(char *oneline, size_t bsize){
  const char *realname;
  char        nickname[100];
  size_t      i, real_len, nick_len, tmp_len;
  while(1){
    gettoken(oneline,nickname,100,0);
    realname = alias.LookUp(nickname);
    if(realname == 0) return;
    real_len = strlen(realname);
    nick_len = strlen(nickname);
    tmp_len  = strlen(oneline) - nick_len;
    if(tmp_len + real_len + 1 > bsize)
      console->execerror(0,"alias too long");
    // remove `nickname' from a `oneline'
    for(i=0; i<tmp_len+1; i++)
      oneline[i] = oneline[i+nick_len];
    // insert `realname' in a `oneline'
    for(i=tmp_len+1; i>0; i--)
      oneline[i+real_len] = oneline[i];
    oneline[real_len] = oneline[0];
    for(i=0; i<real_len; i++)
      oneline[i] = realname[i];
    oneline += real_len;
    bsize -= real_len;
  }
}


//------------------------------------------------------------
char *LexicalAnalizer::getnum(unsigned char *st){ // called by number()
  unsigned char *stptr(st);
  // Float and Integer
  while(isdigit(*stptr))stptr++;
  if(*stptr=='.'){             // decimal point
    stptr++;
    while(isdigit(*stptr))stptr++;   // decimal
  }
  // if '.' only
  if(st[0]=='.' && st+1==stptr)
    return (char*)st;                // not accept
  stptr=exponent(stptr);
  return (char*)stptr;
}

unsigned char *LexicalAnalizer::exponent(unsigned char *st)
{
  unsigned char ch,*stptr(st);
  // e,E
  if((ch=*stptr)!='e' && ch!='E')
    return st;               // not accept

  stptr++;
  // +,-
  if((ch=*stptr)!='+' && ch!='-' && !isdigit((int)ch))
    return st;               // not accept

  if(!isdigit(ch))
    stptr++;

  // Exponent
  if(isdigit(*stptr)){
    while(isdigit(*stptr)) stptr++;
    return stptr;             // accept
  } else
    return st;               // not accept
}

// call by token()
char *LexicalAnalizer::gettoken(char *oneline,char *token,int siz, int mode){
  unsigned char *s,*p;
  s=(unsigned char*)oneline;
  p=(unsigned char*)token;
  // Get a token
  while(*s!='\0'){
    if(mode == 0){
      if(!(isalnum(*s) || *s=='_' || *s=='.')) break;
    }else{
      if(!(isalnum(*s) || *s=='_' || *s=='.' || *s=='/' || *s==':')) break;
    }
    if((char*)p>=token+siz-1){ *p='\0'; break; }
    *p++=*s++;
  }
  *p='\0';
  return (char*)s;                  // return next pointer
}
