#import "common.h"
#import "CMLexer.h"
#import "CMToken.h"
#import "CMContext.h"

@implementation CMLexer

+ (id)lexerWithString:(NSString *)aString context:(CMContext *)aContext;
{
  return [[[[self class] alloc] initWithString:aString
                                context:aContext] autorelease];
}

- (id)init
{
  [super init];
  tokens = [[NSMutableArray alloc] initWithCapacity:1];
  return self;
}

- (id)initWithString:(NSString *)aString context:(CMContext *)aContext
{
  [self init];
  string = [[NSString alloc] initWithString:aString];
  context = [aContext retain];
  [self parse];
  return self;
}

- (void)dealloc
{
  [string release];
  [tokens release];
  [context release];
  [super dealloc];
}

- (NSArray *)tokens { return tokens; }
- (CMContext *)context { return context; }

- (void)parse
{
  NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
  NSScanner *scanner = [NSScanner scannerWithString:string];
  [scanner setCharactersToBeSkipped:nil];

  NSString *matched, *value = nil;
  NSString *commentString1 = @"//";
  NSString *commentString2 = @"#!";
  id setE, strE, str, set, token = nil, preToken;
  NSCharacterSet *escapeSet = [CMToken escapeCharacterSet];
  NSCharacterSet *newLineSet = [CMToken newLineCharacterSet];
  NSArray *sets = [CMToken allTokenCharacterSets];
  NSArray *strs = [CMToken allTokenStrings];
  NSMutableString *stringBuffer, *prefix;
  double doubleValue;
  unsigned int intValue;
  NSDecimal decimalValue;
  int line = 1;

  [context clearCurrentLineNumber];
  prefix = [NSMutableString stringWithCapacity:1];
  while (![scanner isAtEnd]) {
    //LOG([preToken description]);
    preToken = token;
    [preToken setLineNumber:line];
    [context setCurrentLineNumber:line];

    // comment
    matched = nil;
    if ([scanner scanString:commentString1 intoString:&matched] ||
        [scanner scanString:commentString2 intoString:&matched]) {
      [prefix appendString:matched];
      matched = nil;
      [scanner scanUpToCharactersFromSet:newLineSet
               intoString:&matched];
      if (matched)
        [prefix appendString:matched];
      continue;
    }

    // whitespace
    matched = nil;
    if ([scanner scanCharactersFromSet:[NSCharacterSet whitespaceCharacterSet]
                 intoString:&matched]) {
      [prefix appendString:matched];
      continue;
    }

    // escape new line
    matched = nil;
    if ([scanner scanCharactersFromSet:escapeSet intoString:&matched]) {
      stringBuffer = [NSString stringWithString:matched];
      matched = nil;
      if ([scanner scanCharactersFromSet:[CMToken newLineCharacterSet]
                   intoString:&matched]) {
        [prefix appendString:matched];
        line = line + [matched length];
      } else {
        token = [CMToken tokenWithString:stringBuffer];
        [self setPrefixContent:prefix forToken:token];
        [tokens addObject:token];
      }
      continue;
    }

    // new line
    matched = nil;
    if ([scanner scanCharactersFromSet:newLineSet intoString:&matched]) {
      line = line + [matched length];
      if (![preToken isNewLine] && ![preToken isDot] && ![preToken isColon] &&
          ![preToken isComma] && ![preToken isOpen] &&
          ![preToken isAssociationLiteral]) {
        token = [CMToken tokenWithString:matched];
        [self setPrefixContent:prefix forToken:token];
        [tokens addObject:token];
      } else {
        [prefix appendString:matched];
      }
      token = [CMToken tokenWithString:matched];
      continue;
    }

    // double quoted string
    if (token = [self parseQuatedStringWithScanner:scanner
                      isSingleQuotation:NO]) {
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      continue;
    }

    // single quoted string
    if (token = [self parseQuatedStringWithScanner:scanner
                      isSingleQuotation:YES]) {
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      continue;
    }

    // digit
    if (token = [self parseDigitWithScanner:scanner]) {
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      continue;
    }

    // regexp
    if (token = [self parseRegularExpressionWithScanner:scanner
                      preToken:preToken]) {
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      continue;
    }

    // selector
    if (token = [self parseSelectorWithScanner:scanner
                      preToken:preToken]) {
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      continue;
    }

    strE = [strs objectEnumerator];
    while (str = [strE nextObject]) {
      matched = nil;
      if ([scanner scanString:str intoString:&matched]) {
        token = [CMToken tokenWithString:matched];
        [self setPrefixContent:prefix forToken:token];
        [tokens addObject:token];
        break;
      }
    }
    if (matched) continue;

    setE = [sets objectEnumerator];
    while (set = [setE nextObject]) {
      matched = nil;
      if ([scanner scanCharactersFromSet:set intoString:&matched]) {
        token = [CMToken tokenWithString:matched];
        [self setPrefixContent:prefix forToken:token];
        [tokens addObject:token];
        NSRange range = [matched rangeOfCharacterFromSet:newLineSet];
        if (range.length != 0)
          [context incrementCurrentLineNumber];
        break;
      }
    }
    if (matched) continue;


    // double
    if ([scanner scanDouble:&doubleValue]) {
      if (doubleValue == (long)doubleValue)
        value = [NSString stringWithFormat:@"%d", (long)doubleValue];
      else
        [NSString stringWithFormat:@"%f", doubleValue];
      token = [CMToken tokenWithString:value];
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      LOG(@"* double: %@", [token description]);
      continue;
    }

    // hex int
    if ([scanner scanHexInt:&intValue]) {
      token = [CMToken tokenWithString:[NSString stringWithFormat:@"%d", intValue]];
      [self setPrefixContent:prefix forToken:token];
      LOG(@"* hex int: %@", [token description]);
      [tokens addObject:token];
      continue;
    }

    // decimal
    if ([scanner scanDecimal:&decimalValue]) {
      value = [NSString stringWithString:
                          [[NSDecimalNumber decimalNumberWithDecimal:decimalValue]
                            description]];
      token = [CMToken tokenWithString:value];
      [self setPrefixContent:prefix forToken:token];
      [tokens addObject:token];
      LOG(@"* decimal: %@", [token description]);
      continue;
    }
  }

  // add terminate to end of string
  [tokens addObject:[CMToken tokenWithString:@";"]];
  [pool release];
}

- (void)setPrefixContent:(NSMutableString *)aString forToken:(CMToken *)aToken
{
  [aToken setPrefixContent:[NSString stringWithString:aString]];
  [aString setString:@""];
}

- (CMToken *)parseQuatedStringWithScanner:(NSScanner *)scanner
                        isSingleQuotation:(BOOL)flag
{
  NSString *quotation, *matched, *str, *preStr = nil;
  NSMutableString *buffer;
  NSCharacterSet *charSet, *escapeSet = [CMToken escapeCharacterSet];
  int i;
  BOOL isPreCharEscape = NO;
  unichar uchar;

  if (flag)
    quotation = @"'";
  else
    quotation = @"\"";
  charSet = [NSCharacterSet characterSetWithCharactersInString:quotation];

  matched = nil;
  if ([scanner scanString:quotation intoString:&matched]) {
    buffer = [NSMutableString stringWithString:matched];

    for (i = [scanner scanLocation]; i < [[scanner string] length]; i++) {
      uchar = [[scanner string] characterAtIndex:i];
      str = [NSString stringWithCharacters:&uchar length:1];

      if (isPreCharEscape) {
        if ([charSet characterIsMember:uchar]) {
          // do nothing

        } else if ([escapeSet characterIsMember:uchar]) {
          [buffer appendString:preStr];

        } else if (flag) {
          [buffer appendString:preStr];
          [buffer appendString:preStr];

        } else {
          [buffer appendString:preStr];
        }

        [buffer appendString:str];
        isPreCharEscape = NO;

      } else if ([escapeSet characterIsMember:uchar]) {
        isPreCharEscape = YES;

      } else if ([charSet characterIsMember:uchar]) {
        [buffer appendString:str];
        i++;
        break;

      } else {
        [buffer appendString:str];
      }
      preStr = str;
      [scanner setScanLocation:i];
      if ([scanner isAtEnd]) [context raiseSyntaxException];
    }

    [scanner setScanLocation:i];
    return [CMToken tokenWithString:buffer];
  }
  return nil;
}


static BOOL initedDigitSets = NO;
static NSCharacterSet *digitSet, *zeroSet, *dotSet, *preBinarySet, *preOctSet,
  *preHexSet, *exponentSet, *pnSet, *hexSet, *binarySet, *octSet, *baseSet;
static NSMutableCharacterSet *digitTermSet;

- (CMToken *)parseDigitWithScanner:(NSScanner *)scanner
{
  BOOL isBinary = NO, isOct = NO, isHex = NO, isExponent = NO, isFloat = NO,
    specifiesPrecision = NO;
  unsigned int location;
  NSString *str;
  NSMutableString *buffer;
  unichar digitChar, baseChar = nil;
  CMToken *token;

  if (!initedDigitSets) {
    initedDigitSets = YES;
    digitSet = [[NSCharacterSet decimalDigitCharacterSet] retain];
    zeroSet = [[NSCharacterSet characterSetWithCharactersInString:@"0"] retain];
    baseSet = [[NSCharacterSet characterSetWithCharactersInString:@"boxe"] retain];
    dotSet = [[NSCharacterSet characterSetWithCharactersInString:@"."] retain];
    preBinarySet = [[NSCharacterSet characterSetWithCharactersInString:@"b"] retain];
    preOctSet = [[NSCharacterSet characterSetWithCharactersInString:@"o"] retain];
    preHexSet = [[NSCharacterSet characterSetWithCharactersInString:@"x"] retain];
    exponentSet = [[NSCharacterSet characterSetWithCharactersInString:@"e"] retain];
    pnSet = [[NSCharacterSet characterSetWithCharactersInString:@"+-"] retain];
    hexSet = [[NSCharacterSet
                characterSetWithCharactersInString:@"0123456789abcdefABCDEF"] retain];
    binarySet = [[NSCharacterSet characterSetWithCharactersInString:@"01"] retain];
    octSet = [[NSCharacterSet characterSetWithCharactersInString:@"01234567"] retain];
    digitTermSet = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
    [digitTermSet addCharactersInString:@"()[]{},:@#+-*/%^<>=|&"];
  }

  buffer = [NSMutableString stringWithCapacity:1];
  location = [scanner scanLocation];
  str = [scanner string];

  digitChar = [str characterAtIndex:location];
  if ([str length] > location + 1)
    baseChar = [str characterAtIndex:location+1];
  [buffer appendFormat:@"%C", digitChar];
  location++;
  if ([zeroSet characterIsMember:digitChar] && 
      [baseSet characterIsMember:baseChar])
    specifiesPrecision = YES;
  else if ([digitSet characterIsMember:digitChar])
    specifiesPrecision = NO;
  else
    return nil;

  if (specifiesPrecision) {
    // check precision
    digitChar = [str characterAtIndex:location];
    [buffer appendFormat:@"%C", digitChar];
    location++;
    if ([preBinarySet characterIsMember:digitChar])
      isBinary = YES;
    else if ([preOctSet characterIsMember:digitChar] ||
             [digitSet characterIsMember:digitChar])
      isOct = YES;
    else if ([preHexSet characterIsMember:digitChar])
      isHex = YES;
    else
      [context raiseSyntaxException];

    for (; location < [str length]; location++) {
      digitChar = [str characterAtIndex:location];
      if ((isBinary && [binarySet characterIsMember:digitChar]) ||
          (isOct && [octSet characterIsMember:digitChar]) ||
          (isHex && [hexSet characterIsMember:digitChar])) {
        [buffer appendFormat:@"%C", digitChar];
      } else if ([digitTermSet characterIsMember:digitChar]) {
        break;
      } else {
        [context raiseSyntaxException];
      }
    }

  } else {
    for (; location < [str length]; location++) {
      digitChar = [str characterAtIndex:location];
      if ([digitSet characterIsMember:digitChar]) {
        [buffer appendFormat:@"%C", digitChar];

      } else if ([dotSet characterIsMember:digitChar]) {
        isFloat = YES;
        [buffer appendFormat:@"%C", digitChar];

      } else if (isFloat && [exponentSet characterIsMember:digitChar]) {
        isExponent = YES;
        [buffer appendFormat:@"%C", digitChar];

      } else if (isExponent) {
        isExponent = NO;
        if (![pnSet characterIsMember:digitChar])
          [context raiseSyntaxException];
        [buffer appendFormat:@"%C", digitChar];

      } else if ([digitTermSet characterIsMember:digitChar]) {
        break;
      } else {
        [context raiseSyntaxException];
      }
    }
  }

  [scanner setScanLocation:location];
  token = [CMToken tokenWithString:buffer];
  [token setIsDigitLiteral:YES];
  return token;
}


- (CMToken *)parseRegularExpressionWithScanner:(NSScanner *)scanner
                                      preToken:(CMToken *)preToken
{
  CMToken *token;
  NSString *matched, *str, *preStr = nil;
  NSMutableString *buffer;
  NSMutableArray *options;
  NSCharacterSet *charSet, *escapeSet = [CMToken escapeCharacterSet],
    *optionSet = [CMToken regularExpressionOptionCharacterSet];
  int i;
  BOOL isPreCharEscape = NO, isOption = NO;
  unichar uchar;

  if ([preToken isStringLiteral] || [preToken isDigitLiteral] ||
      [preToken isClose] || [preToken isSingleMessage])
    return nil;

  matched = nil;
  charSet = [NSCharacterSet characterSetWithCharactersInString:@"/"];
  if ([scanner scanString:@"/" intoString:&matched]) {
    buffer = [NSMutableString stringWithString:matched];
    options = [NSMutableArray arrayWithCapacity:1];

    for (i = [scanner scanLocation]; i < [[scanner string] length]; i++) {
      uchar = [[scanner string] characterAtIndex:i];
      str = [NSString stringWithCharacters:&uchar length:1];

      if (isOption) {
        if ([optionSet characterIsMember:uchar]) {
          [options addObject:str];

        } else {
          break;
        }

      } else if (isPreCharEscape) {
        [buffer appendString:preStr];
        [buffer appendString:str];
        isPreCharEscape = NO;

      } else if ([escapeSet characterIsMember:uchar]) {
        isPreCharEscape = YES;

      } else if ([charSet characterIsMember:uchar]) {
        [buffer appendString:str];
        isOption = YES;

      } else {
        [buffer appendString:str];
      }

      preStr = str;
      [scanner setScanLocation:i];
      if ([scanner isAtEnd]) [context raiseSyntaxException];
    }

    [scanner setScanLocation:i];
    token = [CMToken tokenWithString:buffer];
    [token setRegularExpressionOptions:options];
    return token;
  }
  return nil;
}


- (CMToken *)parseSelectorWithScanner:(NSScanner *)scanner
                             preToken:(CMToken *)preToken
{
  NSString *matched, *str, *preStr = nil;
  NSMutableString *buffer;
  NSCharacterSet *endCharSet;
  int i;
  unichar uchar;

  if ([preToken isStringLiteral] || [preToken isDigitLiteral] ||
      [preToken isClose] || [preToken isSingleMessage])
    return nil;

  matched = nil;
  endCharSet = [NSCharacterSet characterSetWithCharactersInString:@">"];
  if ([scanner scanString:@"<" intoString:&matched]) {
    buffer = [NSMutableString stringWithString:matched];

    for (i = [scanner scanLocation]; i < [[scanner string] length]; i++) {
      uchar = [[scanner string] characterAtIndex:i];
      str = [NSString stringWithCharacters:&uchar length:1];

      if ([endCharSet characterIsMember:uchar]) {
        [buffer appendString:str];
        i++;
        break;

      } else {
        [buffer appendString:str];
      }

      preStr = str;
      [scanner setScanLocation:i];
      if ([scanner isAtEnd]) [context raiseSyntaxException];
    }

    [scanner setScanLocation:i];
    return [CMToken tokenWithString:buffer];
  }
  return nil;
}

@end
