﻿//  MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//  Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
//  Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;

namespace NMeCab.Core
{
    public class MeCabDictionary
    {
        #region Const

        private const uint DictionaryMagicID = 0xEF718F77u;
        private const uint DicVersion = 102u;

        #endregion

        #region Field/Property

        private Token[] tokens;

        private byte[] features;

        private DoubleArray da = new DoubleArray();

        private Encoding encoding;

        /// <summary>
        /// 辞書の文字コード
        /// </summary>
        public string CharSet { get; private set; }

        /// <summary>
        /// バージョン
        /// </summary>
        public uint Version { get; private set; }

        /// <summary>
        /// 辞書のタイプ
        /// </summary>
        public DictionaryType Type { get; private set; }

        public uint LexSize { get; private set; }

        /// <summary>
        /// 左文脈 ID のサイズ
        /// </summary>
        public uint LSize { get; private set; }

        /// <summary>
        /// 右文脈 ID のサイズ
        /// </summary>
        public uint RSize { get; private set; }

        /// <summary>
        /// 辞書のファイル名
        /// </summary>
        public string FileName { get; private set; }

        #endregion

        #region Open Methods

        public void Open(string filePath)
        {
            using (FileStream fileStream = new FileStream(filePath, FileMode.Open))
            using (BinaryReader reader = new BinaryReader(fileStream))
            {
                this.Open(reader, filePath);
            }
        }

        public unsafe void Open(BinaryReader reader, string filePath = "")
        {
            this.FileName = filePath;

            uint magic = reader.ReadUInt32();
            if (reader.BaseStream.CanSeek
                && reader.BaseStream.Length != (magic ^ DictionaryMagicID)) //CanSeekの時のみストリーム長のチェック
                throw new MeCabInvalidFileException("dictionary file is broken", filePath);

            this.Version = reader.ReadUInt32();
            if (this.Version != DicVersion)
                throw new MeCabInvalidFileException("incompatible version", filePath);

            this.Type = (DictionaryType)reader.ReadUInt32();
            this.LexSize = reader.ReadUInt32();
            this.LSize = reader.ReadUInt32();
            this.RSize = reader.ReadUInt32();
            uint dSize = reader.ReadUInt32();
            uint tSize = reader.ReadUInt32();
            uint fSize = reader.ReadUInt32();
            reader.ReadUInt32(); //dummy

            this.CharSet = Utils.GetString(reader.ReadBytes(32), Encoding.ASCII);
            this.encoding = Encoding.GetEncoding(this.CharSet);

            this.da.Open(reader, dSize);

            this.tokens = new Token[tSize / sizeof(Token)];
            for (int i = 0; i < this.tokens.Length; i++)
            {
                this.tokens[i] = new Token(reader);
            }

            this.features = reader.ReadBytes((int)fSize);

            if (reader.BaseStream.ReadByte() != -1)
                throw new MeCabInvalidFileException("dictionary file is broken", filePath);
        }

        #endregion

        #region Search Methods

        public unsafe DoubleArray.ResultPair ExactMatchSearch(string key)
        {
            fixed (char* pKey = key)
                return this.ExactMatchSearch(pKey, key.Length, 0);
        }

        public unsafe DoubleArray.ResultPair ExactMatchSearch(char* key, int len, int nodePos = 0)
        {
            //エンコード
            int maxByteCount = this.encoding.GetMaxByteCount(len);
            byte* bytes = stackalloc byte[maxByteCount];
            int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteCount);

            DoubleArray.ResultPair result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos);

            //文字数をデコードしたものに変換
            result.Length = this.encoding.GetCharCount(bytes, result.Length);

            return result;
        }

        public unsafe int CommonPrefixSearch(char* key, int len, DoubleArray.ResultPair* result, int rLen)
        {
            //エンコード
            int maxByteLen = this.encoding.GetMaxByteCount(len);
            byte* bytes = stackalloc byte[maxByteLen];
            int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteLen);

            int n = this.da.CommonPrefixSearch(bytes, result, rLen, bytesLen);

            //文字数をデコードしたものに変換
            for (int i = 0; i < n; i++)
                result[i].Length = this.encoding.GetCharCount(bytes, result[i].Length);

            return n;
        }

        #endregion

        #region Get Infomation Methods

        public Token[] GetToken(DoubleArray.ResultPair n)
        {
            Token[] dist = new Token[this.GetTokenSize(n)];
            int tokenPos = n.Value >> 8;
            Array.Copy(this.tokens, tokenPos, dist, 0, dist.Length);
            return dist;
        }

        public int GetTokenSize(DoubleArray.ResultPair n)
        {
            return byte.MaxValue & n.Value; // byte.MaxValue=0xFF
        }

        public string GetFuture(uint featurePos)
        {
            return Utils.GetString(this.features, (long)featurePos, this.encoding);
        }

        #endregion

        #region etc. Methods

        public bool IsCompatible(MeCabDictionary d)
        {
            return (this.Version == d.Version &&
                    this.LSize == d.LSize &&
                    this.RSize == d.RSize &&
                    this.CharSet == d.CharSet);
        }

        #endregion
    }
}
