示例#1
0
        public unsafe void Open(string fileName)
        {
            this.FileName = fileName;

            var sourceFileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);

            try
            {
                this.mmf = MemoryMappedFile.CreateFromFile(sourceFileStream, null, 0L, MemoryMappedFileAccess.Read, HandleInheritability.None, false);
            }
            catch (Exception)
            {
                sourceFileStream.Dispose();
                throw;
            }
            this.mmva = mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read);

            byte *ptr = null;

            this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);

            using (var stream = this.mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read))
                using (var reader = new BinaryReader(stream))
                {
                    uint magic = reader.ReadUInt32();
                    if (this.mmva.Capacity < (magic ^ DictionaryMagicID))
                    {
                        throw new InvalidDataException($"dictionary file is broken. {fileName}");
                    }

                    this.Version = reader.ReadUInt32();
                    if (this.Version != DicVersion)
                    {
                        throw new InvalidDataException($"incompatible version dictionaly. {fileName}");
                    }

                    this.Type    = (DictionaryType)reader.ReadUInt32();
                    this.LexSize = reader.ReadUInt32();
                    this.LSize   = reader.ReadUInt32();
                    this.RSize   = reader.ReadUInt32();
                    uint dSize = reader.ReadUInt32();
                    uint tSize = reader.ReadUInt32();
                    uint fSize = reader.ReadUInt32();
                    reader.ReadUInt32(); //dummy

                    string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
                    this.encoding = StrUtils.GetEncoding(charSet);

                    ptr += stream.Position;

                    this.da.Open(ptr, (int)dSize);
                    ptr += dSize;

                    this.tokens = (Token *)ptr;
                    ptr        += tSize;

                    this.features = ptr;
                }
        }
示例#2
0
        public unsafe void Open(string dicDir, string[] userDics)
        {
            this.property.Open(dicDir);

            this.dic = new MeCabDictionary[userDics.Length + 1];
            for (int i = 0; i < this.dic.Length; i++)
            {
                this.dic[i] = new MeCabDictionary();
            }

            var sysDic = this.dic[0];

            sysDic.Open(Path.Combine(dicDir, SysDicFile));
            if (sysDic.Type != DictionaryType.Sys)
            {
                throw new InvalidDataException($"not a system dictionary. {sysDic.FileName ?? ""}");
            }

            for (int i = 0; i < userDics.Length; i++)
            {
                var d = this.dic[i + 1];
                d.Open(Path.Combine(dicDir, userDics[i]));
                if (d.Type != DictionaryType.Usr)
                {
                    throw new InvalidDataException($"not a user dictionary. {d.FileName ?? ""}");
                }
                if (!sysDic.IsCompatible(d))
                {
                    throw new InvalidDataException($"incompatible dictionary. {d.FileName ?? ""}");
                }
            }

            this.unkDic.Open(Path.Combine(dicDir, UnkDicFile));
            if (this.unkDic.Type != DictionaryType.Unk)
            {
                throw new InvalidDataException($"not a unk dictionary. {UnkDicFile}");
            }

            this.unkTokens = new Token[this.property.Size][];
            for (int i = 0; i < this.unkTokens.Length; i++)
            {
                fixed(byte *key = this.property.Name(i))
                {
                    var n = this.unkDic.ExactMatchSearch(key, StrUtils.GetLength(key));

                    if (n.Value == -1)
                    {
                        throw new InvalidDataException($"cannot find UNK category: {StrUtils.GetString(key, this.Encoding)} {this.unkDic.FileName ?? ""}");
                    }

                    this.unkTokens[i] = this.unkDic.GetTokensArray(n.Value);
                }
            }

            this.space = this.property.GetCharInfo(' ');

            this.Encoding = StrUtils.GetEncoding(sysDic.CharSet);
        }
示例#3
0
        public void Open(byte[] contents)
        {
            int  offset = 0;
            uint magic  = BitConverter.ToUInt32(contents, offset);

            offset      += 4;
            this.Version = BitConverter.ToUInt32(contents, offset);
            if (this.Version != DicVersion)
            {
                throw new MeCabInvalidFileException("incompatible version", "");
            }

            offset += 4;

            this.Type    = (DictionaryType)BitConverter.ToUInt32(contents, offset); offset += 4;
            this.LexSize = BitConverter.ToUInt32(contents, offset); offset += 4;
            this.LSize   = BitConverter.ToUInt32(contents, offset); offset += 4;
            this.RSize   = BitConverter.ToUInt32(contents, offset); offset += 4;
            uint dSize = BitConverter.ToUInt32(contents, offset); offset += 4;
            uint tSize = BitConverter.ToUInt32(contents, offset); offset += 4;
            uint fSize = BitConverter.ToUInt32(contents, offset); offset += 4;

            offset += 4;  //dummy

            byte[] b32 = new byte[33];
            Buffer.BlockCopy(contents, offset, b32, 0, 32);
            b32[32] = 0x00;
            offset += 32;
            string charSet = StrUtils.GetString(b32, Encoding.ASCII);

            this.encoding = StrUtils.GetEncoding(charSet);

            this.da.Open(contents, ref offset, dSize);

            this.tokens = new Token[tSize / 16];
            for (int i = 0; i < this.tokens.Length; i++)
            {
                this.tokens[i] = Token.Create(contents, offset);
                offset        += 16;
            }
            this.features = new byte[(int)fSize];
            Buffer.BlockCopy(contents, offset, this.features, 0, (int)fSize);
            offset += (int)fSize;

            if (offset != contents.Length)
            {
                throw new MeCabInvalidFileException("dictionary file is broken", "");
            }
            //if (reader.BaseStream.ReadByte() != -1)
            //    throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
        }
        public unsafe void Open(BinaryReader reader)
        {
            uint magic = reader.ReadUInt32();

            //CanSeekの時のみストリーム長のチェック
            if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (magic ^ DictionaryMagicID))
            {
                throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
            }

            this.Version = reader.ReadUInt32();
            if (this.Version != DicVersion)
            {
                throw new MeCabInvalidFileException("incompatible version", this.FileName);
            }

            this.Type    = (DictionaryType)reader.ReadUInt32();
            this.LexSize = reader.ReadUInt32();
            this.LSize   = reader.ReadUInt32();
            this.RSize   = reader.ReadUInt32();
            uint dSize = reader.ReadUInt32();
            uint tSize = reader.ReadUInt32();
            uint fSize = reader.ReadUInt32();

            reader.ReadUInt32(); //dummy

            string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);

            this.encoding = StrUtils.GetEncoding(charSet);

            this.da.Open(reader, dSize);

            this.tokens = new Token[tSize / sizeof(Token)];
            for (int i = 0; i < this.tokens.Length; i++)
            {
                this.tokens[i] = Token.Create(reader);
            }

            this.features = reader.ReadBytes((int)fSize);

            if (reader.BaseStream.ReadByte() != -1)
            {
                throw new MeCabInvalidFileException("dictionary file is broken", this.FileName);
            }
        }
        public void Open(MemoryMappedFile mmf, string filePath = null)
        {
            this.FileName = filePath;

            using (MemoryMappedViewStream stream = mmf.CreateViewStream(
                       0L, 0L, MemoryMappedFileAccess.Read))
                using (BinaryReader reader = new BinaryReader(stream))
                {
                    uint magic = reader.ReadUInt32();
                    if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用
                    {
                        throw new MeCabInvalidFileException("dictionary file is broken", filePath);
                    }

                    this.Version = reader.ReadUInt32();
                    if (this.Version != DicVersion)
                    {
                        throw new MeCabInvalidFileException("incompatible version", filePath);
                    }

                    this.Type    = (DictionaryType)reader.ReadUInt32();
                    this.LexSize = reader.ReadUInt32();
                    this.LSize   = reader.ReadUInt32();
                    this.RSize   = reader.ReadUInt32();
                    uint dSize = reader.ReadUInt32();
                    uint tSize = reader.ReadUInt32();
                    uint fSize = reader.ReadUInt32();
                    reader.ReadUInt32(); //dummy

                    string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
                    this.encoding = StrUtils.GetEncoding(charSet);

                    long offset = stream.Position;
                    this.da.Open(mmf, offset, dSize);
                    offset       += dSize;
                    this.tokens   = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read);
                    offset       += tSize;
                    this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read);
                }
        }