﻿using System;
using System.Text;
using System.IO;
using System.Collections.Generic;
using System.Threading.Tasks;

namespace EncodeDetect
{
    public enum LineFeedType
    {
        CR = 0,
        LF = 1,
        CRLF = 2,
    }

    static public class LineFeedHelper
    {
        /// <summary>
        /// LineFeedTypeから文字列に変換する
        /// </summary>
        /// <param name="type"></param>
        /// <returns></returns>
        public static string ToString(LineFeedType type)
        {
            switch (type)
            {
                case LineFeedType.CR:
                    return "\r";
                case LineFeedType.LF:
                    return "\n";
                case LineFeedType.CRLF:
                    return "\r\n";
            }
            throw new ArgumentOutOfRangeException();
        }

        /// <summary>
        /// 改行コードを判別する
        /// </summary>
        /// <param name="bs"></param>
        /// <returns></returns>
        public static LineFeedType GetLineFeed(char[] bs)
        {
            const char CR = '\r', LF = '\n';
            int len = bs.Length > 1024 ? 1024 : bs.Length;
            for (int i = 0; i < len; i++)
            {
                if (i + 1 < len && bs[i] == CR && bs[i + 1] == LF)
                    return LineFeedType.CRLF;
                else if (bs[i] == CR)
                    return LineFeedType.CR;
                else if (bs[i] == LF)
                    return LineFeedType.LF;
            }
            return LineFeedType.CRLF;
        }

        /// <summary>
        /// 改行コードを判別する
        /// </summary>
        /// <param name="filepath"></param>
        /// <returns></returns>
        public static LineFeedType GetLineFeed(string filepath, Encoding enc)
        {
            StreamReader sr = new StreamReader(filepath, enc);
            LineFeedType lf;
            try
            {
                char[] buf = new char[1024];
                sr.Read(buf, 0, 1024);
                lf = LineFeedHelper.GetLineFeed(buf);
            }
            finally
            {
                sr.Close();
            }
            return lf;
        }
    }

    static public class DectingEncode
    {
        const int bufsize = 8192;
        readonly static Encoding defalutEncoding = Encoding.Default;
        /// <summary>
        /// 文字コードを判別する
        /// </summary>
        /// <param name="bs"></param>
        /// <returns>判定できないときはnullが返る</returns>
        /// 作者　http://www.geocities.jp/gakaibon/tips/csharp2008/charset-check-samplecode4.html
        public static async Task<Encoding> GetCodeAsync(FileStream fs)
        {
            byte[] bs = new byte[4];
            int readCount = await fs.ReadAsync(bs,0,4).ConfigureAwait(false);
            if (readCount >= 4 &&
                (bs[0] == 0xFF && bs[1] == 0xFE && bs[2] == 0x00 && bs[3] == 0x00))
            {
                return Encoding.GetEncoding(12000); // UTF-32
            }

            if (readCount >= 4 &&
                (bs[0] == 0x00 && bs[1] == 0x00 && bs[2] == 0xFE && bs[3] == 0xFF))
            {
                return Encoding.GetEncoding(12001); // UTF-32 Big Endian
            }

            if (readCount >= 2 && (bs[0] == 0xFF && bs[1] == 0xFE))
            {
                return Encoding.GetEncoding(1200);  // UTF-16
            }

            if (readCount >= 2 && (bs[0] == 0xFE && bs[1] == 0xFF))
            {
                return Encoding.GetEncoding(1201);  // UTF-16 Big Endian
            }

            if (await IsJis(fs) == true)
            {
                return Encoding.GetEncoding(50220); // 日本語 (JIS)
            }

            if (await IsAscii(fs) == true)
            {
                return Encoding.GetEncoding(20127); // US-ASCII
            }

            EncodingScore utf8 = await IsUTF8(fs).ConfigureAwait(false);
            EncodingScore sjis = await IsSJIS(fs).ConfigureAwait(false);
            EncodingScore euc = await IsEUC(fs).ConfigureAwait(false);

            if (utf8.IsMatched() || sjis.IsMatched() || euc.IsMatched())
            {
                if (euc.Score > sjis.Score && euc.Score > utf8.Score)
                {
                    return Encoding.GetEncoding(51932); // 日本語 (EUC)
                }
                else if (sjis.Score > euc.Score && sjis.Score > utf8.Score)
                {
                    return Encoding.GetEncoding(932);   // 日本語 (シフト JIS)
                }
                else if (utf8.Score > euc.Score && utf8.Score > sjis.Score)
                {
                    if (utf8.HasBom == true)
                    {
                        return new UTF8Encoding(true);    // UTF-8 (BOMあり)
                    }
                    else
                    {
                        return new UTF8Encoding(false);   // UTF-8N (BOMなし)
                    }
                }
            }

            return null;
        }

        /// <summary>
        /// 文字コードを判別する
        /// </summary>
        /// <param name="filepath">ファイル名</param>
        /// <returns></returns>
        public static async Task<Encoding> GetCodeAsync2(string filepath)
        {
            FileStream fs = null;
            Encoding code;
            try
            {
                fs = new FileStream(filepath, FileMode.Open, FileAccess.Read);
                code = await DectingEncode.GetCodeAsync(fs).ConfigureAwait(false);
                if (code == null)
                    code = defalutEncoding;
            }
            finally
            {
                if (fs != null)
                    fs.Close();
            }
            return code;
        }

        /// <summary>
        /// 文字コードを判別する
        /// </summary>
        /// <param name="filepath">ファイル名</param>
        /// <returns></returns>
        public static Encoding GetCode2(string filepath)
        {
            FileStream fs = null;
            Encoding code;
            try
            {
                fs = new FileStream(filepath, FileMode.Open, FileAccess.Read);
                Task<Encoding> task = DectingEncode.GetCodeAsync(fs);
                code = task.Result; 
                if (code == null)
                    code = defalutEncoding;
            }
            finally
            {
                if (fs != null)
                    fs.Close();
            }
            return code;
        }

        private static async Task ReadBytes(FileStream fs, int count, Func<byte[], int, int> func)
        {
            Byte[] bytes = new byte[count];
            int readCount = 0;
            long currentPostion = 0;
            while (true)
            {
                fs.Position = currentPostion;
                readCount = await fs.ReadAsync(bytes, 0, count).ConfigureAwait(false);
                if (readCount == 0)
                    break;
                byte[] output = new byte[readCount];
                int delta = func(bytes,readCount);
                if (delta == -1)
                    break;
                currentPostion += delta;
                if (currentPostion >= fs.Length)
                    break;
            }
        }

        private static async Task<bool> IsJis(FileStream fs)
        {
            byte b1, b2, b3, b4, b5, b6;
            bool reslt = false;

            await ReadBytes(fs,6,(bs,len)=>{
                int i = 0;
                b1 = bs[i];

                if (b1 > 0x7F)
                {
                    reslt = false;   // Not ISO-2022-JP (0x00～0x7F)
                    return -1;
                }
                if (len >= 2)
                {
                    b2 = bs[i + 1]; b3 = bs[i + 2];
                    if (b1 == 0x1B && b2 == 0x28 && b3 == 0x42)
                    {
                        reslt = true;    // ESC ( B  : JIS ASCII
                        return -1;
                    }
                    else if (b1 == 0x1B && b2 == 0x28 && b3 == 0x4A)
                    {
                        reslt = true;
                        return -1;
                    }
                    else if (b1 == 0x1B && b2 == 0x28 && b3 == 0x49)
                    {
                        reslt = true;
                        return -1;
                    }
                    else if (b1 == 0x1B && b2 == 0x24 && b3 == 0x40)
                    {
                        reslt = true;
                        return -1;
                    }
                    else if (b1 == 0x1B && b2 == 0x24 && b3 == 0x42)
                    {
                        reslt = true;
                        return -1;
                    }
                }
                if (len >= 4)
                {
                    b2 = bs[i + 1]; b3 = bs[i + 2]; b4 = bs[i + 3];
                    if (b1 == 0x1B && b2 == 0x24 && b3 == 0x28 && b4 == 0x44)
                    {
                        reslt = true;
                        return -1;
                    }
                }
                if (len >= 6)
                {
                    b2 = bs[i + 1]; b3 = bs[i + 2]; b4 = bs[i + 3]; b5 = bs[i + 4]; b6 = bs[i + 5];
                    if (b1 == 0x1B && b2 == 0x26 && b3 == 0x40 &&
                        b4 == 0x1B && b5 == 0x24 && b6 == 0x42)
                    {
                        reslt = true;
                        return -1;
                    }
                }
                return 1;
            }).ConfigureAwait(false);

            return reslt;
        }

        private static async Task<bool> IsAscii(FileStream fs)
        {
            bool reslut = true;
            await ReadBytes(fs, 1, (bs,len) =>
            {
                if (bs[0] >= 0x00 && bs[0] <= 0x7F)
                {
                    return 1;
                }
                else
                {
                    reslut = false;
                    return -1;
                }
            }).ConfigureAwait(false);
            return reslut;
        }

        class EncodingScore
        {
            public bool IsMatched()
            {
                return this.Score > 0;
            }
            public int Score;
            public bool HasBom;
            public EncodingScore()
            {
                this.Score = 0;
                this.HasBom = false;
            }
        }

        private static async Task<EncodingScore> IsSJIS(FileStream fs)
        {
            byte b1, b2;
            EncodingScore result = new EncodingScore();
            await ReadBytes(fs, 2, (bs,len) =>
            {
                int i = 0;
                b1 = bs[i];

                if (b1 >= 0x00 && b1 <= 0x7F)
                {
                    return 1;
                }

                if (b1 >= 0xA1 && b1 <= 0xDF)
                {
                    return 1;
                }

                if (len == 2)
                {
                    b2 = bs[i + 1];

                    if (((b1 >= 0x81 && b1 <= 0x9F) || (b1 >= 0xE0 && b1 <= 0xFC)) &&
                        ((b2 >= 0x40 && b2 <= 0x7E) || (b2 >= 0x80 && b2 <= 0xFC)))
                    {
                        // kanji first byte  : 0x81～0x9F or 0xE0～0xFC
                        //       second byte : 0x40～0x7E or 0x80～0xFC
                        result.Score += 2;
                        return 2;
                    }
                }
                return 1;
            }).ConfigureAwait(false);
            return result;
        }

        private static async Task<EncodingScore> IsEUC(FileStream fs)
        {
            byte b1, b2, b3;
            EncodingScore result = new EncodingScore();

            await ReadBytes(fs, 3, (bs,len) =>
            {
                int i = 0;
                b1 = bs[i];

                if (b1 >= 0x00 && b1 <= 0x7F)
                {
                    return 1;
                }

                if (len >= 2)
                {
                    b2 = bs[i + 1];

                    if ((b1 >= 0xA1 && b1 <= 0xFE) &&
                        (b2 >= 0xA1 && b2 <= 0xFE))
                    {
                        result.Score += 2;
                        return 2;
                    }

                    if ((b1 == 0x8E) &&
                        (b2 >= 0xA1 && b2 <= 0xDF))
                    {
                        result.Score += 2;
                        return 2;
                    }
                }

                if (len >= 3)
                {
                    b2 = bs[i + 1]; b3 = bs[i + 2];

                    if ((b1 == 0x8F) &&
                        (b2 >= 0xA1 && b2 <= 0xFE) &&
                        (b3 >= 0xA1 && b3 <= 0xFE))
                    {
                        result.Score += 3;
                        return 3;
                    }
                }

                return 1;
            }).ConfigureAwait(false);
            return result;
        }

        private static async Task<EncodingScore> IsUTF8(FileStream fs)
        {
            byte b1, b2, b3, b4;
            EncodingScore result = new EncodingScore();

            await ReadBytes(fs, 4, (bs,len) =>
            {
                int i = 0;
                b1 = bs[i];

                if (b1 >= 0x00 && b1 <= 0x7F)
                {
                    return 1;
                }

                if (len >= 2)
                {
                    b2 = bs[i + 1];

                    if ((b1 >= 0xC0 && b1 <= 0xDF) &&
                        (b2 >= 0x80 && b2 <= 0xBF))
                    {
                        result.Score += 2;  // 2 byte char
                        return 2;
                    }
                }

                if (len >= 3)
                {
                    b2 = bs[i + 1]; b3 = bs[i + 2];

                    if (b1 == 0xEF && b2 == 0xBB && b3 == 0xBF)
                    {
                        result.HasBom = true;     // BOM : 0xEF 0xBB 0xBF
                        result.Score += 3;
                        return 3;
                    }

                    if ((b1 >= 0xE0 && b1 <= 0xEF) &&
                        (b2 >= 0x80 && b2 <= 0xBF) &&
                        (b3 >= 0x80 && b3 <= 0xBF))
                    {
                        result.Score += 3;
                        return 3;
                    }
                }

                if (len >= 4)
                {
                    b2 = bs[i + 1]; b3 = bs[i + 2]; b4 = bs[i + 3];

                    if ((b1 >= 0xF0 && b1 <= 0xF7) &&
                        (b2 >= 0x80 && b2 <= 0xBF) &&
                        (b3 >= 0x80 && b3 <= 0xBF) &&
                        (b4 >= 0x80 && b4 <= 0xBF))
                    {
                        result.Score += 3;
                        return 4;
                    }
                }
                return 1;
            });
            return result;
        }

    }
}
