#include "validator.h"
#include "../util/string_funcs.h"

using namespace aka2;

ustring xs::whiteSpace_replace(const ustring &str) {
  ustring replaced;
  replaced.reserve(str.size());
  
  for (ustring::size_type pos = 0; pos < str.size(); ++pos) {
    uchar_t uch = str[pos];
    if ((uch != 0x09) && (uch != 0x0a) && (uch != 0x0d)) {
      replaced += uch;
    }
    else {
      replaced += uchar_t(0x20);
    }
  }
  return replaced;
}



ustring xs::whiteSpace_collapse(const ustring &str) {
  ustring replaced = whiteSpace_replace(str);
  ustring collapsed;
  
  collapsed.reserve(replaced.size());
  
  /** remove first 0x20's */
  ustring::size_type pos = 0;
  while (replaced[pos] == 0x20) {
    ++pos;
  }
  
  /* remove contiguous 0x20 */
  while (pos < str.size()) {
    uchar_t uch;
    if ((uch = replaced[pos]) != 0x20) {
      collapsed += uch;
      ++pos;
    }
    else {
      /* space found */
      collapsed += 0x20; // add first 0x20.
      ++pos;
      while ((replaced[pos] == 0x20) && (pos < str.size())) {
	++pos;
      }
    }  
  }

  if (collapsed[collapsed.length()] == 0x20) {
    collapsed.resize(collapsed.length() - 1);
  }

  return collapsed;
}



aka2::ustring xs::to_NCName(const ustring &src, const char *type) {
  ustring collapsed = xs::whiteSpace_collapse(src);
  if (is_NCName(collapsed)) {
    return collapsed;
  }
  std::string message = "Wrong xs:NCName text, " + 
    quote(to_lcp(aka2::ucs2_to_pivot(src))) + " for " + type + ".";

  throw error(message, __FILE__, __LINE__);
  return aka2::ustring();
}


ustring xs::to_Name(const ustring &src, const char *type) {
  ustring collapsed = xs::whiteSpace_collapse(src);
  if (is_Name(collapsed)) {
    return collapsed;
  }
  std::string message = "Wrong xs:Name text, " + 
    quote(to_lcp(ucs2_to_pivot(src))) + " for " + type + ".";

  throw error(message, __FILE__, __LINE__);
  return ustring();
}

ustring xs::to_NMTOKEN(const ustring &src, const char *type) {
  ustring collapsed = xs::whiteSpace_collapse(src);
  if (is_NMTOKEN(collapsed)) {
    return collapsed;
  }
  std::string message = "Wrong xs:Name text, " + 
    quote(to_lcp(ucs2_to_pivot(src))) + " for " + type + ".";

  throw error(message, __FILE__, __LINE__);
  return ustring();
}

ustring xs::to_token(const ustring &src) {
  return xs::whiteSpace_collapse(src);
}



/**
 *
 */

std::string xs::to_NCName(const pstring &src, const char *type,
			  entity_complements &ecomps) {
  ustring ncname = to_NCName(pivot_to_ucs2(src), type);
  return ecomps.to_lcp(ucs2_to_pivot(ncname));
}





std::string xs::to_Name(const pstring &src, const char *type,
		    entity_complements &ecomps) {
  ustring name = xs::to_Name(pivot_to_ucs2(src), type);
  return ecomps.to_lcp(ucs2_to_pivot(name));
}

std::string xs::to_NMTOKEN(const pstring &src, const char *type,
		       entity_complements &ecomps) {
  ustring nmtoken = to_NMTOKEN(pivot_to_ucs2(src), type);
  return ecomps.to_lcp(ucs2_to_pivot(nmtoken));
}

std::string xs::to_token(const pstring &src, 
			 entity_complements &ecomps) {
  aka2::ustring collapsed = xs::whiteSpace_collapse(aka2::pivot_to_ucs2(src));
  return ecomps.to_lcp(aka2::ucs2_to_pivot(collapsed));
}


/**
 * Names and Tokens 
 * [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | 
 *  [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | 
 *  [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
 *  [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
 * [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | 
 *  [#x0300-#x036F] | [#x203F-#x2040]
 * [5] Name	   ::= NameStartChar (NameChar)*
 * [6] Names	   ::= Name (#x20 Name)*
 * [7] Nmtoken	   ::= (NameChar)+
 * [8] Nmtokens	   ::= Nmtoken (#x20 Nmtoken)* 
 */

bool xs::is_NameStartChar(uchar_t uch) {
  return 
    (uch == ':') ||
    (('A' <= uch) && (uch <= 'Z')) ||
    (uch == '_') ||
    (('a' <= uch) && (uch <= 'z')) ||
    ((0xc0 <= uch) && (uch <= 0xd6)) ||
    ((0xd8 <= uch) && (uch <= 0xf6)) ||
    ((0xf8 <= uch) && (uch <= 0x2ff)) ||
    ((0x370 <= uch) && (uch <= 0x37d)) ||
    ((0x37f <= uch) && (uch <= 0x1fff)) ||
    ((0x200c <= uch) && (uch <= 0x200d)) ||
    ((0x2070 <= uch) && (uch <= 0x218f)) ||
    ((0x2c00 <= uch) && (uch <= 0x2fef)) ||
    ((0x3001 <= uch) && (uch <= 0xd7ff)) ||
    ((0xf900 <= uch) && (uch <= 0xfdcf)) ||
    ((0xfdf0 <= uch) && (uch <= 0xfffd));

  /**
   * UCS-4 has additional name chars 
   * ((0x10000 <= uch) && (uch <= 0xeffff))
   */
}

bool xs::is_NameChar(uchar_t uch) {
  if (is_NameStartChar(uch))
    return true;

  return 
    (uch == '-') ||
    (uch == '.') ||
    (('0' <= uch) && (uch <= '9')) ||
    (uch == 0xb7) ||
    ((0x300 <= uch) && (uch <= 0x36f)) ||
    ((0x203f <= uch) && (uch <= 0x2040));
}

bool xs::is_Name(const ustring &unistr) {
  
  if (unistr.length() != 0) {
    if (is_NameStartChar(unistr[0])) {
      for (ustring::size_type index = 1; index < unistr.length(); ++index) {
	if (is_NameChar(unistr[index]))
	  continue;
	else
	  goto not_a_name;
      }
      return true;
    }
  }
 not_a_name:
  return false;
}


bool xs::is_NCName(const ustring &unistr) {
  if (is_Name(unistr)) {
    for (ustring::size_type pos = 0; pos < unistr.size(); ++pos) {
      if (unistr[pos] != ':')
	continue;
      else
	return false;
    }
    return true;
  }
  return false;
}


bool xs::is_NMTOKEN(const ustring &unistr) {
  for (ustring::size_type index = 0; index < unistr.length(); ++index) {
    if (is_NameChar(unistr[index]))
      continue;
    else
      return false;
  }
  return true;
}


bool xs::is_QName(const ustring &unistr) {
  ustring::size_type pos = unistr.find_first_of(':');
  if (pos == ustring::npos) {
    return is_NCName(unistr);
  }
  else {
    ustring prefix = unistr.substr(0, pos);
    ustring localname = unistr.substr(pos + 1);
    return is_NCName(prefix) && (is_NCName(localname));
  }
}
  
  
