/* ------------------------------------------------------------------------- */
/*
 *  utf8.h
 *
 *  Copyright (c) 2004 - 2009, clown. All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *    - Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *    - Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *    - No names of its contributors may be used to endorse or promote
 *      products derived from this software without specific prior written
 *      permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 *  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *  Last-modified: Wed 12 Aug 2009 17:10:00 JST
 */
/* ------------------------------------------------------------------------- */
#ifndef CLX_UTF8_H
#define CLX_UTF8_H

#include <iterator>
#include <string>
#include <stdexcept>

namespace clx {
	namespace utf8 {
		/* ----------------------------------------------------------------- */
		//  get
		/* ----------------------------------------------------------------- */
		template <class InIter>
		inline size_t get(InIter& first, InIter last) {
			if (first == last) return size_t(-1);
			
			size_t dest = 0;
			size_t n = 0;
			size_t c = *first & 0xff;
			if (c <= 0x7f) n = 1;
			else if (c >= 0xc0 && c <= 0xdf) n = 2;
			else if (c >= 0xe0 && c <= 0xef) n = 3;
			else if (c >= 0xf0 && c <= 0xf7) n = 4;
#ifdef CLX_ALLOW_ISO_10646
			else if (c >= 0xf8 && c <= 0xfb) n = 5;
			else if (c >= 0xfc && c <= 0xfd) n = 6;
#endif
			else throw std::runtime_error("invalid UTF-8 character code");
			
			dest = 0;
			for (size_t i = 0; i < n && first != last; ++i, ++first) {
				c = *first & 0xff;
				if (i > 0 && !(c >= 0x80 && c <= 0xbf)) throw std::runtime_error("invalid UTF-8 character code");
				dest |= c << (i * 8);
			}
			
			return dest;
		}
		
		/* ----------------------------------------------------------------- */
		//  get
		/* ----------------------------------------------------------------- */
		template <class InIter, class OutIter>
		inline OutIter get(InIter& first, InIter last, OutIter out) {
			if (first == last) return out;
			
			size_t n = 0;
			size_t c = *first & 0xff;
			if (c <= 0x7f) n = 1;
			else if (c >= 0xc0 && c <= 0xdf) n = 2;
			else if (c >= 0xe0 && c <= 0xef) n = 3;
			else if (c >= 0xf0 && c <= 0xf7) n = 4;
#ifdef CLX_ALLOW_ISO_10646
			else if (c >= 0xf8 && c <= 0xfb) n = 5;
			else if (c >= 0xfc && c <= 0xfd) n = 6;
#endif
			else throw std::runtime_error("invalid UTF-8 character code");
			
			for (size_t i = 0; i < n && first != last; ++i, ++first) {
				c = *first & 0xff;
				if (i > 0 && !(c >= 0x80 && c <= 0xbf)) throw std::runtime_error("invalid UTF-8 character code");
				out = *first;
				++out;
			}
			
			return out;
		}
		
		/* ----------------------------------------------------------------- */
		//  get
		/* ----------------------------------------------------------------- */
		template <class InIter, class OutIter>
		inline OutIter get(InIter& first, InIter last, OutIter out, size_t n) {
			for (size_t i = 0; i < n && first != last; ++i) {
				out = clx::utf8::get(first, last, out);
			}
			return out;
		}
		
		/* ----------------------------------------------------------------- */
		//  peek
		/* ----------------------------------------------------------------- */
		template <class InIter>
		inline size_t peek(InIter first, InIter last) {
			InIter tmp = first;
			return clx::utf8::get(tmp, last);
		}
		
		namespace detail {
			/* ------------------------------------------------------------- */
			//  back
			/* ------------------------------------------------------------- */
			template <class InIter>
			inline void back(InIter& pos, size_t n) {
				for (size_t i = 0; i < n; ++i) {
					--pos;
					size_t c = *pos & 0xff;
					while (c >= 0x80 && c <= 0xbf) {
						--pos;
						c = *pos & 0xff;
					}
				}
			}
		}
		
		/* ----------------------------------------------------------------- */
		//  advance
		/* ----------------------------------------------------------------- */
		template <class InIter>
		inline void advance(InIter& pos, int n) {
			if (n < 0) {
				detail::back(pos, -n);
				return;
			}
			
			for (int i = 0; i < n; ++i) {
				size_t c = *pos & 0xff;
				if (c <= 0x7f) ++pos;
				else if (c >= 0xc0 && c <= 0xdf) std::advance(pos, 2);
				else if (c >= 0xe0 && c <= 0xef) std::advance(pos, 3);
				else if (c >= 0xf0 && c <= 0xf7) std::advance(pos, 4);
#ifdef CLX_ALLOW_ISO_10646
				else if (c >= 0xf8 && c <= 0xfb) std::advance(pos, 5);
				else if (c >= 0xfc && c <= 0xfd) std::advance(pos, 6);
#endif
				else throw std::runtime_error("invalid UTF-8 character code");
			}
		}
		
		/* ----------------------------------------------------------------- */
		//  distance
		/* ----------------------------------------------------------------- */
		template <class InIter>
		inline size_t distance(InIter first, InIter last) {
			size_t dest = 0;
			while (first != last) {
				size_t c = *first & 0xff;
				++dest;
				if (c <= 0x7f) ++first;
				else if (c >= 0xc0 && c <= 0xdf) std::advance(first, 2);
				else if (c >= 0xe0 && c <= 0xef) std::advance(first, 3);
				else if (c >= 0xf0 && c <= 0xf7) std::advance(first, 4);
#ifdef CLX_ALLOW_ISO_10646
				else if (c >= 0xf8 && c <= 0xfb) std::advance(first, 5);
				else if (c >= 0xfc && c <= 0xfd) std::advance(first, 6);
#endif
				else throw std::runtime_error("invalid UTF-8 character code");
			}
			return dest;
		}
		
		/* ----------------------------------------------------------------- */
		//  distance
		/* ----------------------------------------------------------------- */
		inline size_t distance(const std::basic_string<char>& src) {
			return clx::utf8::distance(src.begin(), src.end());
		}
		
		/* ----------------------------------------------------------------- */
		//  distance
		/* ----------------------------------------------------------------- */
		inline size_t distance(const char* src) {
			std::basic_string<char> tmp(src);
			return clx::utf8::distance(tmp);
		}
	}
}

#endif // CLX_UTF8_H
