private unsafe static ulong GetSafeMemoryMappedViewAddress(MeCabDictionary dictionary) { var mmfLoader = Hack.GetFieldValue(dictionary, "mmfLoader") as MemoryMappedFileLoader; var mmva = Hack.GetFieldValue(mmfLoader, "mmva") as MemoryMappedViewAccessor; byte *pointer = null; mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref pointer); return((ulong)pointer); }
private void ReadNodeInfo(MeCabDictionary dic, Token token, MeCabNode node) { node.LCAttr = token.LcAttr; node.RCAttr = token.RcAttr; node.PosId = token.PosId; node.WCost = token.WCost; //node.Token = token; //node.Feature = dic.GetFeature(token); //この段階では素性情報を取得しない node.SetFeature(token.Feature, dic); //そのかわり遅延取得を可能にする }
public void Open(MeCabParam param) { this.dic = new MeCabDictionary[param.UserDic.Length + 1]; string prefix = param.DicDir; this.property.Open(prefix); this.unkDic.Open(Path.Combine(prefix, UnkDicFile)); if (this.unkDic.Type != DictionaryType.Unk) throw new MeCabInvalidFileException("not a unk dictionary", this.unkDic.FileName); MeCabDictionary sysDic = new MeCabDictionary(); sysDic.Open(Path.Combine(prefix, SysDicFile)); if (sysDic.Type != DictionaryType.Sys) throw new MeCabInvalidFileException("not a system dictionary", sysDic.FileName); this.dic[0] = sysDic; for (int i = 0; i < param.UserDic.Length; i++) { MeCabDictionary d = new MeCabDictionary(); d.Open(Path.Combine(prefix, param.UserDic[i])); if (d.Type != DictionaryType.Usr) throw new MeCabInvalidFileException("not a user dictionary", d.FileName); if (!sysDic.IsCompatible(d)) throw new MeCabInvalidFileException("incompatible dictionary", d.FileName); this.dic[i + 1] = d; } this.unkTokens = new Token[this.property.Size][]; for (int i = 0; i < this.unkTokens.Length; i++) { string key = this.property.Name(i); DoubleArray.ResultPair n = this.unkDic.ExactMatchSearch(key); if (n.Value == -1) throw new MeCabInvalidFileException("cannot find UNK category: " + key, this.unkDic.FileName); this.unkTokens[i] = this.unkDic.GetToken(n); } this.space = this.property.GetCharInfo(' '); this.bosFeature = param.BosFeature; this.unkFeature = param.UnkFeature; this.maxGroupingSize = param.MaxGroupingSize; if (this.maxGroupingSize <= 0) this.maxGroupingSize = DefaltMaxGroupingSize; }
private unsafe static string GetFeature <TNode>(ulong address, DictionaryBundle <TNode> bundle, MeCabDictionary dic) where TNode : MeCabNodeBase <TNode> { var h = (byte *)address; return(StrUtils.GetString(dic.GetFeature(*(uint *)(h + 8)), bundle.Tokenizer.Encoding)); }
/// <summary> /// Gets all tokens from a dictionary as isolated nodes. /// </summary> /// <typeparam name="TNode">The type of nodes that <paramref name="bundle"/> is for.</typeparam> /// <param name="bundle">The dictionary bundle containing <paramref name="dictionary"/>.</param> /// <param name="dictionary">The dictionary to get nodes from.</param> /// <returns>Iteration of nodes.</returns> /// <remarks> /// Iterating over all nodes may require some significant time, depending on the size of the dictionary. Please be careful. /// </remarks> public static IEnumerable <TNode> GetNodes <TNode>(this DictionaryBundle <TNode> bundle, MeCabDictionary dictionary) where TNode : MeCabNodeBase <TNode> { var address = GetSafeMemoryMappedViewAddress(dictionary); ulong token_table_starts; ulong token_table_ends; GetTokenTableLocations(address, out token_table_starts, out token_table_ends); for (ulong t = token_table_starts; t < token_table_ends; t += 16) { var node = bundle.NodeAllocator(); LoadNodeData(t, node); node.Feature = GetFeature(t, bundle, dictionary); node.Stat = MeCabNodeStat.Nor; yield return(node); } }
/// <summary> /// Gets the header information of a MeCab dic file. /// </summary> /// <typeparam name="TNode">The type of nodes that <paramref name="bundle"/> is for.</typeparam> /// <param name="bundle">The dictionary bundle containing <paramref name="dictionary"/>.</param> /// <param name="dictionary">A dictionary object to get the header from.</param> /// <returns>A header.</returns> public unsafe static Header GetHeader <TNode>(this DictionaryBundle <TNode> bundle, MeCabDictionary dictionary) where TNode : MeCabNodeBase <TNode> { var h = (byte *)GetSafeMemoryMappedViewAddress(dictionary); var charset = new byte[32]; for (int i = 0; i < charset.Length; i++) { charset[i] = h[40 + i]; } return(new Header() { Magic = *(uint *)(h + 0), Version = *(uint *)(h + 4), Type = *(uint *)(h + 8), LexSize = *(uint *)(h + 12), LSize = *(uint *)(h + 16), RSize = *(uint *)(h + 20), DSize = *(uint *)(h + 24), TSize = *(uint *)(h + 28), FSize = *(uint *)(h + 32), Dummy = *(uint *)(h + 36), Charset = charset, }); }
/// <summary> /// 素性情報を遅延読込するための値設定 /// </summary> /// <param name="featurePos">辞書内の素性情報の位置</param> /// <param name="dic">検索元の辞書</param> internal void SetFeature(uint featurePos, MeCabDictionary dic) { this.feature = null; this.featurePos = featurePos; this.Dictionary = dic; }
internal void SetFeature(uint featurePos, MeCabDictionary dic) { this.feature = null; this.featurePos = featurePos; this.Dictionary = dic; }