public unsafe void Open(string fileName) { this.FileName = fileName; var sourceFileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); try { this.mmf = MemoryMappedFile.CreateFromFile(sourceFileStream, null, 0L, MemoryMappedFileAccess.Read, HandleInheritability.None, false); } catch (Exception) { sourceFileStream.Dispose(); throw; } this.mmva = mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read); byte *ptr = null; this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr); using (var stream = this.mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read)) using (var reader = new BinaryReader(stream)) { uint magic = reader.ReadUInt32(); if (this.mmva.Capacity < (magic ^ DictionaryMagicID)) { throw new InvalidDataException($"dictionary file is broken. {fileName}"); } this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) { throw new InvalidDataException($"incompatible version dictionaly. {fileName}"); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint dSize = reader.ReadUInt32(); uint tSize = reader.ReadUInt32(); uint fSize = reader.ReadUInt32(); reader.ReadUInt32(); //dummy string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); ptr += stream.Position; this.da.Open(ptr, (int)dSize); ptr += dSize; this.tokens = (Token *)ptr; ptr += tSize; this.features = ptr; } }
public unsafe void Open(string dicDir, string[] userDics) { this.property.Open(dicDir); this.dic = new MeCabDictionary[userDics.Length + 1]; for (int i = 0; i < this.dic.Length; i++) { this.dic[i] = new MeCabDictionary(); } var sysDic = this.dic[0]; sysDic.Open(Path.Combine(dicDir, SysDicFile)); if (sysDic.Type != DictionaryType.Sys) { throw new InvalidDataException($"not a system dictionary. {sysDic.FileName ?? ""}"); } for (int i = 0; i < userDics.Length; i++) { var d = this.dic[i + 1]; d.Open(Path.Combine(dicDir, userDics[i])); if (d.Type != DictionaryType.Usr) { throw new InvalidDataException($"not a user dictionary. {d.FileName ?? ""}"); } if (!sysDic.IsCompatible(d)) { throw new InvalidDataException($"incompatible dictionary. {d.FileName ?? ""}"); } } this.unkDic.Open(Path.Combine(dicDir, UnkDicFile)); if (this.unkDic.Type != DictionaryType.Unk) { throw new InvalidDataException($"not a unk dictionary. {UnkDicFile}"); } this.unkTokens = new Token[this.property.Size][]; for (int i = 0; i < this.unkTokens.Length; i++) { fixed(byte *key = this.property.Name(i)) { var n = this.unkDic.ExactMatchSearch(key, StrUtils.GetLength(key)); if (n.Value == -1) { throw new InvalidDataException($"cannot find UNK category: {StrUtils.GetString(key, this.Encoding)} {this.unkDic.FileName ?? ""}"); } this.unkTokens[i] = this.unkDic.GetTokensArray(n.Value); } } this.space = this.property.GetCharInfo(' '); this.Encoding = StrUtils.GetEncoding(sysDic.CharSet); }
public void Open(byte[] contents) { int offset = 0; uint magic = BitConverter.ToUInt32(contents, offset); offset += 4; this.Version = BitConverter.ToUInt32(contents, offset); if (this.Version != DicVersion) { throw new MeCabInvalidFileException("incompatible version", ""); } offset += 4; this.Type = (DictionaryType)BitConverter.ToUInt32(contents, offset); offset += 4; this.LexSize = BitConverter.ToUInt32(contents, offset); offset += 4; this.LSize = BitConverter.ToUInt32(contents, offset); offset += 4; this.RSize = BitConverter.ToUInt32(contents, offset); offset += 4; uint dSize = BitConverter.ToUInt32(contents, offset); offset += 4; uint tSize = BitConverter.ToUInt32(contents, offset); offset += 4; uint fSize = BitConverter.ToUInt32(contents, offset); offset += 4; offset += 4; //dummy byte[] b32 = new byte[33]; Buffer.BlockCopy(contents, offset, b32, 0, 32); b32[32] = 0x00; offset += 32; string charSet = StrUtils.GetString(b32, Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); this.da.Open(contents, ref offset, dSize); this.tokens = new Token[tSize / 16]; for (int i = 0; i < this.tokens.Length; i++) { this.tokens[i] = Token.Create(contents, offset); offset += 16; } this.features = new byte[(int)fSize]; Buffer.BlockCopy(contents, offset, this.features, 0, (int)fSize); offset += (int)fSize; if (offset != contents.Length) { throw new MeCabInvalidFileException("dictionary file is broken", ""); } //if (reader.BaseStream.ReadByte() != -1) // throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); }
public unsafe void Open(BinaryReader reader) { uint magic = reader.ReadUInt32(); //CanSeekの時のみストリーム長のチェック if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (magic ^ DictionaryMagicID)) { throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); } this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) { throw new MeCabInvalidFileException("incompatible version", this.FileName); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint dSize = reader.ReadUInt32(); uint tSize = reader.ReadUInt32(); uint fSize = reader.ReadUInt32(); reader.ReadUInt32(); //dummy string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); this.da.Open(reader, dSize); this.tokens = new Token[tSize / sizeof(Token)]; for (int i = 0; i < this.tokens.Length; i++) { this.tokens[i] = Token.Create(reader); } this.features = reader.ReadBytes((int)fSize); if (reader.BaseStream.ReadByte() != -1) { throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); } }
public void Open(MemoryMappedFile mmf, string filePath = null) { this.FileName = filePath; using (MemoryMappedViewStream stream = mmf.CreateViewStream( 0L, 0L, MemoryMappedFileAccess.Read)) using (BinaryReader reader = new BinaryReader(stream)) { uint magic = reader.ReadUInt32(); if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用 { throw new MeCabInvalidFileException("dictionary file is broken", filePath); } this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) { throw new MeCabInvalidFileException("incompatible version", filePath); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint dSize = reader.ReadUInt32(); uint tSize = reader.ReadUInt32(); uint fSize = reader.ReadUInt32(); reader.ReadUInt32(); //dummy string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); long offset = stream.Position; this.da.Open(mmf, offset, dSize); offset += dSize; this.tokens = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read); offset += tSize; this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read); } }