Пример #1
0
 public unsafe static string GetString(byte[] bytes, long offset, Encoding enc)
 {
     fixed(byte *ptr = bytes)
     {
         return(StrUtils.GetString(ptr + offset, enc));
     }
 }
Пример #2
0
        public unsafe void Open(string fileName)
        {
            this.FileName = fileName;

            var sourceFileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);

            try
            {
                this.mmf = MemoryMappedFile.CreateFromFile(sourceFileStream, null, 0L, MemoryMappedFileAccess.Read, HandleInheritability.None, false);
            }
            catch (Exception)
            {
                sourceFileStream.Dispose();
                throw;
            }
            this.mmva = mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read);

            byte *ptr = null;

            this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);

            using (var stream = this.mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read))
                using (var reader = new BinaryReader(stream))
                {
                    uint magic = reader.ReadUInt32();
                    if (this.mmva.Capacity < (magic ^ DictionaryMagicID))
                    {
                        throw new InvalidDataException($"dictionary file is broken. {fileName}");
                    }

                    this.Version = reader.ReadUInt32();
                    if (this.Version != DicVersion)
                    {
                        throw new InvalidDataException($"incompatible version dictionaly. {fileName}");
                    }

                    this.Type    = (DictionaryType)reader.ReadUInt32();
                    this.LexSize = reader.ReadUInt32();
                    this.LSize   = reader.ReadUInt32();
                    this.RSize   = reader.ReadUInt32();
                    uint dSize = reader.ReadUInt32();
                    uint tSize = reader.ReadUInt32();
                    uint fSize = reader.ReadUInt32();
                    reader.ReadUInt32(); //dummy

                    string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
                    this.encoding = StrUtils.GetEncoding(charSet);

                    ptr += stream.Position;

                    this.da.Open(ptr, (int)dSize);
                    ptr += dSize;

                    this.tokens = (Token *)ptr;
                    ptr        += tSize;

                    this.features = ptr;
                }
        }
Пример #3
0
        public unsafe void Open(string dicDir, string[] userDics)
        {
            this.property.Open(dicDir);

            this.dic = new MeCabDictionary[userDics.Length + 1];
            for (int i = 0; i < this.dic.Length; i++)
            {
                this.dic[i] = new MeCabDictionary();
            }

            var sysDic = this.dic[0];

            sysDic.Open(Path.Combine(dicDir, SysDicFile));
            if (sysDic.Type != DictionaryType.Sys)
            {
                throw new InvalidDataException($"not a system dictionary. {sysDic.FileName ?? ""}");
            }

            for (int i = 0; i < userDics.Length; i++)
            {
                var d = this.dic[i + 1];
                d.Open(Path.Combine(dicDir, userDics[i]));
                if (d.Type != DictionaryType.Usr)
                {
                    throw new InvalidDataException($"not a user dictionary. {d.FileName ?? ""}");
                }
                if (!sysDic.IsCompatible(d))
                {
                    throw new InvalidDataException($"incompatible dictionary. {d.FileName ?? ""}");
                }
            }

            this.unkDic.Open(Path.Combine(dicDir, UnkDicFile));
            if (this.unkDic.Type != DictionaryType.Unk)
            {
                throw new InvalidDataException($"not a unk dictionary. {UnkDicFile}");
            }

            this.unkTokens = new Token[this.property.Size][];
            for (int i = 0; i < this.unkTokens.Length; i++)
            {
                fixed(byte *key = this.property.Name(i))
                {
                    var n = this.unkDic.ExactMatchSearch(key, StrUtils.GetLength(key));

                    if (n.Value == -1)
                    {
                        throw new InvalidDataException($"cannot find UNK category: {StrUtils.GetString(key, this.Encoding)} {this.unkDic.FileName ?? ""}");
                    }

                    this.unkTokens[i] = this.unkDic.GetTokensArray(n.Value);
                }
            }

            this.space = this.property.GetCharInfo(' ');

            this.Encoding = sysDic.Encoding;
        }
Пример #4
0
        public void Open(byte[] contents)
        {
            int  offset = 0;
            uint magic  = BitConverter.ToUInt32(contents, offset);

            offset      += 4;
            this.Version = BitConverter.ToUInt32(contents, offset);
            if (this.Version != DicVersion)
            {
                throw new MeCabInvalidFileException("incompatible version", "");
            }

            offset += 4;

            this.Type    = (DictionaryType)BitConverter.ToUInt32(contents, offset); offset += 4;
            this.LexSize = BitConverter.ToUInt32(contents, offset); offset += 4;
            this.LSize   = BitConverter.ToUInt32(contents, offset); offset += 4;
            this.RSize   = BitConverter.ToUInt32(contents, offset); offset += 4;
            uint dSize = BitConverter.ToUInt32(contents, offset); offset += 4;
            uint tSize = BitConverter.ToUInt32(contents, offset); offset += 4;
            uint fSize = BitConverter.ToUInt32(contents, offset); offset += 4;

            offset += 4;  //dummy

            byte[] b32 = new byte[33];
            Buffer.BlockCopy(contents, offset, b32, 0, 32);
            b32[32] = 0x00;
            offset += 32;
            string charSet = StrUtils.GetString(b32, Encoding.ASCII);

            this.encoding = StrUtils.GetEncoding(charSet);

            this.da.Open(contents, ref offset, dSize);

            this.tokens = new Token[tSize / 16];
            for (int i = 0; i < this.tokens.Length; i++)
            {
                this.tokens[i] = Token.Create(contents, offset);
                offset        += 16;
            }
            this.features = new byte[(int)fSize];
            Buffer.BlockCopy(contents, offset, this.features, 0, (int)fSize);
            offset += (int)fSize;

            if (offset != contents.Length)
            {
                throw new MeCabInvalidFileException("dictionary file is broken", "");
            }
            //if (reader.BaseStream.ReadByte() != -1)
            //    throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
        }
Пример #5
0
        public unsafe void Open(BinaryReader reader)
        {
            uint magic = reader.ReadUInt32();

            //CanSeekの時のみストリーム長のチェック
            if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (magic ^ DictionaryMagicID))
            {
                throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
            }

            this.Version = reader.ReadUInt32();
            if (this.Version != DicVersion)
            {
                throw new MeCabInvalidFileException("incompatible version", this.FileName);
            }

            this.Type    = (DictionaryType)reader.ReadUInt32();
            this.LexSize = reader.ReadUInt32();
            this.LSize   = reader.ReadUInt32();
            this.RSize   = reader.ReadUInt32();
            uint dSize = reader.ReadUInt32();
            uint tSize = reader.ReadUInt32();
            uint fSize = reader.ReadUInt32();

            reader.ReadUInt32(); //dummy

            string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);

            this.encoding = StrUtils.GetEncoding(charSet);

            this.da.Open(reader, dSize);

            this.tokens = new Token[tSize / sizeof(Token)];
            for (int i = 0; i < this.tokens.Length; i++)
            {
                this.tokens[i] = Token.Create(reader);
            }

            this.features = reader.ReadBytes((int)fSize);

            if (reader.BaseStream.ReadByte() != -1)
            {
                throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
            }
        }
Пример #6
0
        public unsafe void Open(string fileName)
        {
            this.FileName = fileName;

            uint *uintPtr = (uint *)this.mmfLoader.Invoke(fileName);

            uint magic = *uintPtr++;

            if (this.mmfLoader.FileSize != (magic ^ DictionaryMagicID))
            {
                throw new InvalidDataException($"dictionary file is broken. {fileName}");
            }

            this.Version = *uintPtr++;
            if (this.Version != DicVersion)
            {
                throw new InvalidDataException($"incompatible version dictionaly. {fileName}");
            }

            this.Type    = (DictionaryType)(*uintPtr++);
            this.LexSize = *uintPtr++;
            this.LSize   = *uintPtr++;
            this.RSize   = *uintPtr++;
            uint dSize = *uintPtr++;
            uint tSize = *uintPtr++;
            uint fSize = *uintPtr++;

            uintPtr++; // dummy

            byte *bytePtr = (byte *)uintPtr;

            var encName = StrUtils.GetString(bytePtr, Encoding.ASCII);

            this.Encoding = encName.GetEncodingOrNull()
                            ?? throw new Exception($"not supported encoding dictionary. {encName} {fileName}");
            bytePtr += 32;

            this.da.Open(bytePtr, (int)dSize);
            bytePtr += dSize;

            this.tokens = (Token *)bytePtr;
            bytePtr    += tSize;

            this.features = bytePtr;
        }
Пример #7
0
        public void Open(MemoryMappedFile mmf, string filePath = null)
        {
            this.FileName = filePath;

            using (MemoryMappedViewStream stream = mmf.CreateViewStream(
                       0L, 0L, MemoryMappedFileAccess.Read))
                using (BinaryReader reader = new BinaryReader(stream))
                {
                    uint magic = reader.ReadUInt32();
                    if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用
                    {
                        throw new MeCabInvalidFileException("dictionary file is broken", filePath);
                    }

                    this.Version = reader.ReadUInt32();
                    if (this.Version != DicVersion)
                    {
                        throw new MeCabInvalidFileException("incompatible version", filePath);
                    }

                    this.Type    = (DictionaryType)reader.ReadUInt32();
                    this.LexSize = reader.ReadUInt32();
                    this.LSize   = reader.ReadUInt32();
                    this.RSize   = reader.ReadUInt32();
                    uint dSize = reader.ReadUInt32();
                    uint tSize = reader.ReadUInt32();
                    uint fSize = reader.ReadUInt32();
                    reader.ReadUInt32(); //dummy

                    string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
                    this.encoding = StrUtils.GetEncoding(charSet);

                    long offset = stream.Position;
                    this.da.Open(mmf, offset, dSize);
                    offset       += dSize;
                    this.tokens   = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read);
                    offset       += tSize;
                    this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read);
                }
        }
Пример #8
0
        public void Open(BinaryReader reader, string fileName = null)
        {
            uint num = reader.ReadUInt32();

            if (reader.BaseStream.CanSeek)
            {
                long num2 = 4 + 32 * num + 4 * this.charInfoList.Length;
                if (reader.BaseStream.Length != num2)
                {
                    throw new MeCabInvalidFileException("invalid file size", fileName);
                }
            }
            this.cList = new string[num];
            for (int i = 0; i < this.cList.Length; i++)
            {
                this.cList[i] = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
            }
            for (int j = 0; j < this.charInfoList.Length; j++)
            {
                this.charInfoList[j] = new CharInfo(reader.ReadUInt32());
            }
        }
Пример #9
0
        public unsafe void Open(BinaryReader reader)
        {
            uint num = reader.ReadUInt32();

            if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (uint)((int)num ^ -277770377))
            {
                throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
            }
            this.Version = reader.ReadUInt32();
            if (this.Version != 102)
            {
                throw new MeCabInvalidFileException("incompatible version", this.FileName);
            }
            this.Type    = (DictionaryType)reader.ReadUInt32();
            this.LexSize = reader.ReadUInt32();
            this.LSize   = reader.ReadUInt32();
            this.RSize   = reader.ReadUInt32();
            uint size  = reader.ReadUInt32();
            uint num2  = reader.ReadUInt32();
            uint count = reader.ReadUInt32();

            reader.ReadUInt32();
            string @string = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);

            this.encoding = Encoding.GetEncoding(@string == "UTF8" ? "UTF-8" : @string);
            this.da.Open(reader, size);
            this.tokens = new Token[(long)num2 / (long)sizeof(Token)];
            for (int i = 0; i < this.tokens.Length; i++)
            {
                this.tokens[i] = Token.Create(reader);
            }
            this.features = reader.ReadBytes((int)count);
            if (reader.BaseStream.ReadByte() == -1)
            {
                return;
            }
            throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
        }
Пример #10
0
        public void Open(BinaryReader reader, string fileName = null)
        {
            uint cSize = reader.ReadUInt32();

            if (reader.BaseStream.CanSeek)
            {
                long fSize = sizeof(uint) + 32 * cSize + sizeof(uint) * charInfoList.Length;
                if (reader.BaseStream.Length != fSize)
                {
                    throw new MeCabInvalidFileException("invalid file size", fileName);
                }
            }

            this.cList = new string[cSize];
            for (int i = 0; i < this.cList.Length; i++)
            {
                this.cList[i] = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
            }

            for (int i = 0; i < this.charInfoList.Length; i++)
            {
                this.charInfoList[i] = new CharInfo(reader.ReadUInt32());
            }
        }
Пример #11
0
        // リソース埋め込み用 2019/10/20
        public void Open()
        {
            byte[] contents = Properties.Resources._char;
            int    offset   = 0;
            uint   cSize    = BitConverter.ToUInt32(contents, offset);

            offset += 4;

            this.cList = new string[cSize];
            for (int i = 0; i < this.cList.Length; i++)
            {
                byte[] b32 = new byte[33];
                Buffer.BlockCopy(contents, offset, b32, 0, 32);
                b32[32]       = 0x00;
                offset       += 32;
                this.cList[i] = StrUtils.GetString(b32, Encoding.ASCII);
            }

            for (int i = 0; i < this.charInfoList.Length; i++)
            {
                charInfoList[i] = new CharInfo(BitConverter.ToUInt32(contents, offset));
                offset         += 4;
            }
        }
Пример #12
0
 public string GetFeature(uint featurePos)
 {
     return(StrUtils.GetString(this.features, (long)featurePos, this.encoding));
 }
Пример #13
0
 public unsafe static string GetString(byte *bytes, long offset, Encoding enc)
 {
     return(StrUtils.GetString(bytes + offset, enc));
 }
Пример #14
0
 public static string GetString(byte[] bytes, Encoding enc)
 {
     return(StrUtils.GetString(bytes, 0L, enc));
 }