public unsafe static string GetString(byte[] bytes, long offset, Encoding enc) { fixed(byte *ptr = bytes) { return(StrUtils.GetString(ptr + offset, enc)); } }
public unsafe void Open(string fileName) { this.FileName = fileName; var sourceFileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); try { this.mmf = MemoryMappedFile.CreateFromFile(sourceFileStream, null, 0L, MemoryMappedFileAccess.Read, HandleInheritability.None, false); } catch (Exception) { sourceFileStream.Dispose(); throw; } this.mmva = mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read); byte *ptr = null; this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr); using (var stream = this.mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read)) using (var reader = new BinaryReader(stream)) { uint magic = reader.ReadUInt32(); if (this.mmva.Capacity < (magic ^ DictionaryMagicID)) { throw new InvalidDataException($"dictionary file is broken. {fileName}"); } this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) { throw new InvalidDataException($"incompatible version dictionaly. {fileName}"); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint dSize = reader.ReadUInt32(); uint tSize = reader.ReadUInt32(); uint fSize = reader.ReadUInt32(); reader.ReadUInt32(); //dummy string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); ptr += stream.Position; this.da.Open(ptr, (int)dSize); ptr += dSize; this.tokens = (Token *)ptr; ptr += tSize; this.features = ptr; } }
public unsafe void Open(string dicDir, string[] userDics) { this.property.Open(dicDir); this.dic = new MeCabDictionary[userDics.Length + 1]; for (int i = 0; i < this.dic.Length; i++) { this.dic[i] = new MeCabDictionary(); } var sysDic = this.dic[0]; sysDic.Open(Path.Combine(dicDir, SysDicFile)); if (sysDic.Type != DictionaryType.Sys) { throw new InvalidDataException($"not a system dictionary. {sysDic.FileName ?? ""}"); } for (int i = 0; i < userDics.Length; i++) { var d = this.dic[i + 1]; d.Open(Path.Combine(dicDir, userDics[i])); if (d.Type != DictionaryType.Usr) { throw new InvalidDataException($"not a user dictionary. {d.FileName ?? ""}"); } if (!sysDic.IsCompatible(d)) { throw new InvalidDataException($"incompatible dictionary. {d.FileName ?? ""}"); } } this.unkDic.Open(Path.Combine(dicDir, UnkDicFile)); if (this.unkDic.Type != DictionaryType.Unk) { throw new InvalidDataException($"not a unk dictionary. {UnkDicFile}"); } this.unkTokens = new Token[this.property.Size][]; for (int i = 0; i < this.unkTokens.Length; i++) { fixed(byte *key = this.property.Name(i)) { var n = this.unkDic.ExactMatchSearch(key, StrUtils.GetLength(key)); if (n.Value == -1) { throw new InvalidDataException($"cannot find UNK category: {StrUtils.GetString(key, this.Encoding)} {this.unkDic.FileName ?? ""}"); } this.unkTokens[i] = this.unkDic.GetTokensArray(n.Value); } } this.space = this.property.GetCharInfo(' '); this.Encoding = sysDic.Encoding; }
public void Open(byte[] contents) { int offset = 0; uint magic = BitConverter.ToUInt32(contents, offset); offset += 4; this.Version = BitConverter.ToUInt32(contents, offset); if (this.Version != DicVersion) { throw new MeCabInvalidFileException("incompatible version", ""); } offset += 4; this.Type = (DictionaryType)BitConverter.ToUInt32(contents, offset); offset += 4; this.LexSize = BitConverter.ToUInt32(contents, offset); offset += 4; this.LSize = BitConverter.ToUInt32(contents, offset); offset += 4; this.RSize = BitConverter.ToUInt32(contents, offset); offset += 4; uint dSize = BitConverter.ToUInt32(contents, offset); offset += 4; uint tSize = BitConverter.ToUInt32(contents, offset); offset += 4; uint fSize = BitConverter.ToUInt32(contents, offset); offset += 4; offset += 4; //dummy byte[] b32 = new byte[33]; Buffer.BlockCopy(contents, offset, b32, 0, 32); b32[32] = 0x00; offset += 32; string charSet = StrUtils.GetString(b32, Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); this.da.Open(contents, ref offset, dSize); this.tokens = new Token[tSize / 16]; for (int i = 0; i < this.tokens.Length; i++) { this.tokens[i] = Token.Create(contents, offset); offset += 16; } this.features = new byte[(int)fSize]; Buffer.BlockCopy(contents, offset, this.features, 0, (int)fSize); offset += (int)fSize; if (offset != contents.Length) { throw new MeCabInvalidFileException("dictionary file is broken", ""); } //if (reader.BaseStream.ReadByte() != -1) // throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); }
public unsafe void Open(BinaryReader reader) { uint magic = reader.ReadUInt32(); //CanSeekの時のみストリーム長のチェック if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (magic ^ DictionaryMagicID)) { throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); } this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) { throw new MeCabInvalidFileException("incompatible version", this.FileName); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint dSize = reader.ReadUInt32(); uint tSize = reader.ReadUInt32(); uint fSize = reader.ReadUInt32(); reader.ReadUInt32(); //dummy string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); this.da.Open(reader, dSize); this.tokens = new Token[tSize / sizeof(Token)]; for (int i = 0; i < this.tokens.Length; i++) { this.tokens[i] = Token.Create(reader); } this.features = reader.ReadBytes((int)fSize); if (reader.BaseStream.ReadByte() != -1) { throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); } }
public unsafe void Open(string fileName) { this.FileName = fileName; uint *uintPtr = (uint *)this.mmfLoader.Invoke(fileName); uint magic = *uintPtr++; if (this.mmfLoader.FileSize != (magic ^ DictionaryMagicID)) { throw new InvalidDataException($"dictionary file is broken. {fileName}"); } this.Version = *uintPtr++; if (this.Version != DicVersion) { throw new InvalidDataException($"incompatible version dictionaly. {fileName}"); } this.Type = (DictionaryType)(*uintPtr++); this.LexSize = *uintPtr++; this.LSize = *uintPtr++; this.RSize = *uintPtr++; uint dSize = *uintPtr++; uint tSize = *uintPtr++; uint fSize = *uintPtr++; uintPtr++; // dummy byte *bytePtr = (byte *)uintPtr; var encName = StrUtils.GetString(bytePtr, Encoding.ASCII); this.Encoding = encName.GetEncodingOrNull() ?? throw new Exception($"not supported encoding dictionary. {encName} {fileName}"); bytePtr += 32; this.da.Open(bytePtr, (int)dSize); bytePtr += dSize; this.tokens = (Token *)bytePtr; bytePtr += tSize; this.features = bytePtr; }
public void Open(MemoryMappedFile mmf, string filePath = null) { this.FileName = filePath; using (MemoryMappedViewStream stream = mmf.CreateViewStream( 0L, 0L, MemoryMappedFileAccess.Read)) using (BinaryReader reader = new BinaryReader(stream)) { uint magic = reader.ReadUInt32(); if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用 { throw new MeCabInvalidFileException("dictionary file is broken", filePath); } this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) { throw new MeCabInvalidFileException("incompatible version", filePath); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint dSize = reader.ReadUInt32(); uint tSize = reader.ReadUInt32(); uint fSize = reader.ReadUInt32(); reader.ReadUInt32(); //dummy string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); long offset = stream.Position; this.da.Open(mmf, offset, dSize); offset += dSize; this.tokens = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read); offset += tSize; this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read); } }
public void Open(BinaryReader reader, string fileName = null) { uint num = reader.ReadUInt32(); if (reader.BaseStream.CanSeek) { long num2 = 4 + 32 * num + 4 * this.charInfoList.Length; if (reader.BaseStream.Length != num2) { throw new MeCabInvalidFileException("invalid file size", fileName); } } this.cList = new string[num]; for (int i = 0; i < this.cList.Length; i++) { this.cList[i] = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); } for (int j = 0; j < this.charInfoList.Length; j++) { this.charInfoList[j] = new CharInfo(reader.ReadUInt32()); } }
public unsafe void Open(BinaryReader reader) { uint num = reader.ReadUInt32(); if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (uint)((int)num ^ -277770377)) { throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); } this.Version = reader.ReadUInt32(); if (this.Version != 102) { throw new MeCabInvalidFileException("incompatible version", this.FileName); } this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); this.LSize = reader.ReadUInt32(); this.RSize = reader.ReadUInt32(); uint size = reader.ReadUInt32(); uint num2 = reader.ReadUInt32(); uint count = reader.ReadUInt32(); reader.ReadUInt32(); string @string = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = Encoding.GetEncoding(@string == "UTF8" ? "UTF-8" : @string); this.da.Open(reader, size); this.tokens = new Token[(long)num2 / (long)sizeof(Token)]; for (int i = 0; i < this.tokens.Length; i++) { this.tokens[i] = Token.Create(reader); } this.features = reader.ReadBytes((int)count); if (reader.BaseStream.ReadByte() == -1) { return; } throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); }
public void Open(BinaryReader reader, string fileName = null) { uint cSize = reader.ReadUInt32(); if (reader.BaseStream.CanSeek) { long fSize = sizeof(uint) + 32 * cSize + sizeof(uint) * charInfoList.Length; if (reader.BaseStream.Length != fSize) { throw new MeCabInvalidFileException("invalid file size", fileName); } } this.cList = new string[cSize]; for (int i = 0; i < this.cList.Length; i++) { this.cList[i] = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); } for (int i = 0; i < this.charInfoList.Length; i++) { this.charInfoList[i] = new CharInfo(reader.ReadUInt32()); } }
// リソース埋め込み用 2019/10/20 public void Open() { byte[] contents = Properties.Resources._char; int offset = 0; uint cSize = BitConverter.ToUInt32(contents, offset); offset += 4; this.cList = new string[cSize]; for (int i = 0; i < this.cList.Length; i++) { byte[] b32 = new byte[33]; Buffer.BlockCopy(contents, offset, b32, 0, 32); b32[32] = 0x00; offset += 32; this.cList[i] = StrUtils.GetString(b32, Encoding.ASCII); } for (int i = 0; i < this.charInfoList.Length; i++) { charInfoList[i] = new CharInfo(BitConverter.ToUInt32(contents, offset)); offset += 4; } }
public string GetFeature(uint featurePos) { return(StrUtils.GetString(this.features, (long)featurePos, this.encoding)); }
public unsafe static string GetString(byte *bytes, long offset, Encoding enc) { return(StrUtils.GetString(bytes + offset, enc)); }
public static string GetString(byte[] bytes, Encoding enc) { return(StrUtils.GetString(bytes, 0L, enc)); }