#include "Codec.h"
#include "String.h"
#include "Codec_Japanese.h"
#include "Codec_ISO8859.h"
#include "OAL.h"

namespace AScript {

//-----------------------------------------------------------------------------
// Codec
//-----------------------------------------------------------------------------
bool Codec::FollowChar(char &chConv)
{
	if (_idxBuff <= 0) return false;
	chConv = _buffOut[--_idxBuff];
	return true;
}

bool Codec::Convert(String &dst, const char *src, int len)
{
	char ch;
	if (len < 0) len = static_cast<int>(::strlen(src));
	for (const char *p = src; len > 0; p++, len--) {
		Codec::Result rtn = FeedChar(*p, ch);
		if (rtn == Codec::RESULT_Complete) {
			dst.push_back(ch);
			while (FollowChar(ch)) dst.push_back(ch);
		} else if (rtn == Codec::RESULT_Error) {
			return false;
		}
	}
	if (Flush(ch) == Codec::RESULT_Complete) {
		dst.push_back(ch);
		while (FollowChar(ch)) dst.push_back(ch);
	}
	return true;
}

Codec::Result Codec::Flush(char &chConv)
{
	return RESULT_None;
}

const char *Codec::EncodingFromLANG()
{
	const char *encodingDefault = "us-ascii";
	struct AssocInfo {
		const char *key;
		const char *value;
	};
	const char *str = OAL::getenv("LANG");
	if (str == NULL) return encodingDefault;
	const char *p = ::strchr(str, '.');
	String langLeft, langRight;
	if (p == NULL) {
		langLeft = str;
	} else {
		langLeft = String(str, p - str);
		langRight = p + 1;
	}
	if (langRight.empty()) {
		static const AssocInfo assocInfoTbl[] = {
			{ "C",		"us-ascii" },
			{ "en_US",	"us-ascii" },
			{ "ja",	 	"euc-jp" },
			{ "ja_JP",	"euc-jp" },
		};
		for (int i = 0; i < NUMBEROF(assocInfoTbl); i++) {
			if (::strcasecmp(langLeft.c_str(), assocInfoTbl[i].key) == 0) {
				return assocInfoTbl[i].value;
			}
		}
	} else {
		static const AssocInfo assocInfoTbl[] = {
			{ "eucJP",	"euc-jp" },
			{ "ujis",	"euc-jp" },
			{ "UTF-8",	"utf-8" },
			{ "SJIS",	"shift_jis" },
		};
		for (int i = 0; i < NUMBEROF(assocInfoTbl); i++) {
			if (::strcasecmp(langRight.c_str(), assocInfoTbl[i].key) == 0) {
				return assocInfoTbl[i].value;
			}
		}
	}
	return encodingDefault;
}

//-----------------------------------------------------------------------------
// CodecFactory
//-----------------------------------------------------------------------------
CodecFactory::List *CodecFactory::_pList = NULL;

CodecFactory::CodecFactory(const char *name) : _name(name)
{
	if (_pList == NULL) {
		_pList = new List();
	}
	_pList->push_back(this);
}

CodecFactory *CodecFactory::Lookup(const char *name)
{
	if (name == NULL || _pList == NULL) return NULL;
	foreach (List, ppCodecFactory, *_pList) {
		CodecFactory *pCodecFactory = *ppCodecFactory;
		if (::strcasecmp(pCodecFactory->GetName(), name) == 0) {
			return pCodecFactory;
		}
	}
	return NULL;
}

// encoding names are specified in http://www.iana.org/assignments/character-sets

AScript_RegisterCodecFactory(Base64, "base64",
		Codec_Encoder_Base64, Codec_Decoder_Base64)

AScript_RegisterCodecFactory(USASCII, "us-ascii",
		Codec_Encoder_USASCII, Codec_Decoder_USASCII)

AScript_RegisterCodecFactory(ISO8859_1, "iso-8859-1",
		Codec_Encoder_Through, Codec_Decoder_Through)
AScript_RegisterCodecFactory(ISO8859_2, "iso-8859-2",
		Codec_Encoder_ISO8859_2, Codec_Decoder_ISO8859_2)
AScript_RegisterCodecFactory(ISO8859_3, "iso-8859-3",
		Codec_Encoder_ISO8859_3, Codec_Decoder_ISO8859_3)
AScript_RegisterCodecFactory(ISO8859_4, "iso-8859-4",
		Codec_Encoder_ISO8859_4, Codec_Decoder_ISO8859_4)
AScript_RegisterCodecFactory(ISO8859_5, "iso-8859-5",
		Codec_Encoder_ISO8859_5, Codec_Decoder_ISO8859_5)
AScript_RegisterCodecFactory(ISO8859_6, "iso-8859-6",
		Codec_Encoder_ISO8859_6, Codec_Decoder_ISO8859_6)
AScript_RegisterCodecFactory(ISO8859_7, "iso-8859-7",
		Codec_Encoder_ISO8859_7, Codec_Decoder_ISO8859_7)
AScript_RegisterCodecFactory(ISO8859_8, "iso-8859-8",
		Codec_Encoder_ISO8859_8, Codec_Decoder_ISO8859_8)
AScript_RegisterCodecFactory(ISO8859_9, "iso-8859-9",
		Codec_Encoder_ISO8859_9, Codec_Decoder_ISO8859_9)
AScript_RegisterCodecFactory(ISO8859_10, "iso-8859-10",
		Codec_Encoder_ISO8859_10, Codec_Decoder_ISO8859_10)
AScript_RegisterCodecFactory(ISO8859_11, "iso-8859-11",
		Codec_Encoder_ISO8859_11, Codec_Decoder_ISO8859_11)
AScript_RegisterCodecFactory(ISO8859_13, "iso-8859-13",
		Codec_Encoder_ISO8859_13, Codec_Decoder_ISO8859_13)
AScript_RegisterCodecFactory(ISO8859_14, "iso-8859-14",
		Codec_Encoder_ISO8859_14, Codec_Decoder_ISO8859_14)
AScript_RegisterCodecFactory(ISO8859_15, "iso-8859-15",
		Codec_Encoder_ISO8859_15, Codec_Decoder_ISO8859_15)
AScript_RegisterCodecFactory(ISO8859_16, "iso-8859-16",
		Codec_Encoder_ISO8859_16, Codec_Decoder_ISO8859_16)

AScript_RegisterCodecFactory(EUCJP, "euc-jp",
		Codec_Encoder_EUCJP, Codec_Decoder_EUCJP)

AScript_RegisterCodecFactory(CP932, "cp932",
		Codec_Encoder_CP932, Codec_Decoder_CP932)
AScript_RegisterCodecFactory(Shift_JIS, "shift_jis",
		Codec_Encoder_CP932, Codec_Decoder_CP932)
AScript_RegisterCodecFactory(MS_Kanji, "ms_kanji",
		Codec_Encoder_CP932, Codec_Decoder_CP932)

AScript_RegisterCodecFactory(UTF8, "utf-8",
		Codec_Encoder_Through, Codec_Decoder_Through)
AScript_RegisterCodecFactory(UTF16, "utf-16",
		Codec_Encoder_UTF16LE, Codec_Decoder_UTF16LE)

//-----------------------------------------------------------------------------
// Codec_Encoder
//-----------------------------------------------------------------------------
Codec::Result Codec_Encoder::FeedChar(char ch, char &chConv)
{
	Result rtn = RESULT_None;
	if ((ch & 0x80) == 0x00) {
		rtn = FeedUTF32(static_cast<unsigned char>(ch), chConv);
		_cntChars = 0;
	} else if ((ch & 0xc0) == 0x80) {
		if (_cntChars == 1) {
			_codeUTF32 = (_codeUTF32 << 6) | (ch & 0x3f);
			rtn = FeedUTF32(_codeUTF32, chConv);
			_codeUTF32 = 0x00000000;
			_cntChars = 0;
		} else if (_cntChars > 0) {
			_codeUTF32 = (_codeUTF32 << 6) | (ch & 0x3f);
			_cntChars--;
		} else {
			_codeUTF32 = 0x00000000;
		}
	} else if ((ch & 0xe0) == 0xc0) {
		_codeUTF32 = static_cast<unsigned char>(ch & 0x1f);
		_cntChars = 1;
	} else if ((ch & 0xf0) == 0xe0) {
		_codeUTF32 = static_cast<unsigned char>(ch & 0x0f);
		_cntChars = 2;
	} else if ((ch & 0xf8) == 0xf0) {
		_codeUTF32 = static_cast<unsigned char>(ch & 0x07);
		_cntChars = 3;
	} else if ((ch & 0xfc) == 0xf8) {
		_codeUTF32 = static_cast<unsigned char>(ch & 0x03);
		_cntChars = 4;
	} else {
		_codeUTF32 = static_cast<unsigned char>(ch & 0x01);
		_cntChars = 5;
	}
	return rtn;
}

//-----------------------------------------------------------------------------
// Codec_Decoder
//-----------------------------------------------------------------------------
Codec::Result Codec_Decoder::FeedUTF32(unsigned long codeUTF32, char &chConv)
{
	_idxBuff = 0;
	if ((codeUTF32 & ~0x7f) == 0) {
		chConv = static_cast<char>(codeUTF32);
		return RESULT_Complete;
	}
	StoreChar(0x80 | static_cast<char>(codeUTF32 & 0x3f)); codeUTF32 >>= 6;
	if ((codeUTF32 & ~0x1f) == 0) {
		chConv = 0xc0 | static_cast<char>(codeUTF32);
		return RESULT_Complete;
	}
	StoreChar(0x80 | static_cast<char>(codeUTF32 & 0x3f)); codeUTF32 >>= 6;
	if ((codeUTF32 & ~0x0f) == 0) {
		chConv = 0xe0 | static_cast<char>(codeUTF32);
		return RESULT_Complete;
	}
	StoreChar(0x80 | static_cast<char>(codeUTF32 & 0x3f)); codeUTF32 >>= 6;
	if ((codeUTF32 & ~0x07) == 0) {
		chConv = 0xf0 | static_cast<char>(codeUTF32);
		return RESULT_Complete;
	}
	_idxBuff = 0;
	chConv = '\0';
	return RESULT_Error;
}

//-----------------------------------------------------------------------------
// Codec_None
//-----------------------------------------------------------------------------
Codec::Result Codec_None::FeedChar(char ch, char &chConv)
{
	chConv = ch;
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Encoder_USASCII
//-----------------------------------------------------------------------------
Codec::Result Codec_Encoder_USASCII::FeedChar(char ch, char &chConv)
{
	if (ch & 0x80) return RESULT_Error;
	if (IsProcessEOL() && ch == '\n') {
		StoreChar('\n');
		chConv = '\r';
	} else {
		chConv = ch;
	}
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Decoder_USASCII
//-----------------------------------------------------------------------------
Codec::Result Codec_Decoder_USASCII::FeedChar(char ch, char &chConv)
{
	if (ch & 0x80) return RESULT_Error;
	if (IsProcessEOL() && ch == '\r') return RESULT_None;
	chConv = ch;
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Encoder_Through
//-----------------------------------------------------------------------------
Codec::Result Codec_Encoder_Through::FeedChar(char ch, char &chConv)
{
	if (IsProcessEOL() && ch == '\n') {
		StoreChar('\n');
		chConv = '\r';
	} else {
		chConv = ch;
	}
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Decoder_Through
//-----------------------------------------------------------------------------
Codec::Result Codec_Decoder_Through::FeedChar(char ch, char &chConv)
{
	if (IsProcessEOL() && ch == '\r') return RESULT_None;
	chConv = ch;
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Encoder_Base64
//-----------------------------------------------------------------------------
const char Codec_Encoder_Base64::_chars[] =
		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

Codec::Result Codec_Encoder_Base64::FeedChar(char ch, char &chConv)
{
	_buff.push_back(ch);
	if (_buff.size() < 3) return RESULT_None;
	unsigned long accum = static_cast<unsigned char >(_buff[0]);
	accum = (accum << 8) | static_cast<unsigned char >(_buff[1]);
	accum = (accum << 8) | static_cast<unsigned char >(_buff[2]);
	StoreChar(_chars[accum & 0x3f]); accum >>= 6;
	StoreChar(_chars[accum & 0x3f]); accum >>= 6;
	StoreChar(_chars[accum & 0x3f]); accum >>= 6;
	chConv = _chars[accum & 0x3f];
	_buff.clear();
	return RESULT_Complete;
}

Codec::Result Codec_Encoder_Base64::Flush(char &chConv)
{
	size_t size = _buff.size();
	if (size == 0) return RESULT_None;
	while (_buff.size() < 3) _buff.push_back(0x00);
	unsigned long accum = static_cast<unsigned char >(_buff[0]);
	accum = (accum << 8) | static_cast<unsigned char >(_buff[1]);
	accum = (accum << 8) | static_cast<unsigned char >(_buff[2]);
	StoreChar('='); accum >>= 6;
	StoreChar((size == 1)? '=' : _chars[accum & 0x3f]); accum >>= 6;
	StoreChar(_chars[accum & 0x3f]); accum >>= 6;
	chConv = _chars[accum & 0x3f];
	_buff.clear();
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Decoder_Base64
//-----------------------------------------------------------------------------
Codec::Result Codec_Decoder_Base64::FeedChar(char ch, char &chConv)
{
	if ('A' <= ch && ch <= 'Z') {
		_accum = (_accum << 6) | (ch - 'A');
	} else if ('a' <= ch && ch <= 'z') {
		_accum = (_accum << 6) | (ch - 'a' + 26);
	} else if ('0' <= ch && ch <= '9') {
		_accum = (_accum << 6) | (ch - '0' + 52);
	} else if (ch == '+') {
		_accum = (_accum << 6) | 62;
	} else if (ch == '/') {
		_accum = (_accum << 6) | 63;
	} else if (ch == '=') {
		_nInvalid++;
		_accum = (_accum << 6);
	} else if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
		return RESULT_None;
	} else {
		return RESULT_Error;
	}
	_nChars++;
	if (_nChars < 4) return RESULT_None;
	if (_nInvalid == 0) {
		StoreChar(static_cast<unsigned char>(_accum & 0xff)); _accum >>= 8;
		StoreChar(static_cast<unsigned char>(_accum & 0xff)); _accum >>= 8;
		chConv = static_cast<unsigned char>(_accum & 0xff);
	} else if (_nInvalid == 1) {
		_accum >>= 8;
		StoreChar(static_cast<unsigned char>(_accum & 0xff)); _accum >>= 8;
		chConv = static_cast<unsigned char>(_accum & 0xff);
	} else if (_nInvalid == 2) {
		_accum >>= 8;
		_accum >>= 8;
		chConv = static_cast<unsigned char>(_accum & 0xff);
	} else {
		_nChars = 0, _nInvalid = 0, _accum = 0;
		return RESULT_None;
	}
	_nChars = 0, _nInvalid = 0, _accum = 0;
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Encoder_UTF16LE
//-----------------------------------------------------------------------------
Codec::Result Codec_Encoder_UTF16LE::FeedUTF32(unsigned long codeUTF32, char &chConv)
{
	if (IsProcessEOL() && codeUTF32 == '\n') {
		StoreChar('\0');
		StoreChar('\n');
		StoreChar('\0');
		chConv = '\r';
	} else {
		StoreChar(static_cast<char>((codeUTF32 >> 8) & 0xff));
		chConv = static_cast<char>((codeUTF32 >> 0) & 0xff);
	}
	return RESULT_Complete;
}

//-----------------------------------------------------------------------------
// Codec_Decoder_UTF16LE
//-----------------------------------------------------------------------------
Codec::Result Codec_Decoder_UTF16LE::FeedChar(char ch, char &chConv)
{
	if (_firstByteFlag) {
		_firstByteFlag = false;
		_codeUTF32 = static_cast<unsigned char>(ch);
		return RESULT_None;
	} else {
		_firstByteFlag = true;
		_codeUTF32 |=
			(static_cast<unsigned long>(static_cast<unsigned char>(ch)) << 8);
		if (IsProcessEOL() && _codeUTF32 == '\r') return RESULT_None;
		return FeedUTF32(_codeUTF32, chConv);
	}
}

}
