static public String GetHiragana(String dicPathFromExe, String str) { MeCabParam param = new MeCabParam(); param.DicDir = dicPathFromExe; MeCabTagger tagger = MeCabTagger.Create(param); MeCabNode node = tagger.ParseToNode(str); String hiragana = ""; while (node != null) { if (node.CharType > 0) { String[] splitStrArray = node.Feature.Split(','); String splitStr; if (splitStrArray.Length < 9) { splitStr = node.Surface; } else { splitStr = splitStrArray[7]; } hiragana = hiragana + splitStr; } node = node.Next; } return(hiragana); }
public IEnumerable <MeCabNode> GetEnumerator() { for (MeCabNode rNode = this.Next(); rNode != null; rNode = this.Next()) { yield return(rNode); } }
private MeCabNode BuildAllLattice(ThreadData work) { if (this.BuildBestLattice(work) == null) { return(null); } MeCabNode prev = work.BosNode; for (int pos = 0; pos < work.BeginNodeList.Length; pos++) { for (MeCabNode node = work.BeginNodeList[pos]; node != null; node = node.BNext) { prev.Next = node; node.Prev = prev; prev = node; for (MeCabPath path = node.LPath; path != null; path = path.LNext) { path.Prob = (float)(path.LNode.Alpha - this.theta * path.Cost + path.RNode.Beta - work.Z); } } } return(work.BosNode); }
private unsafe void ForwardBackward(char *sentence, int len, ThreadData work) { this.DoViterbi(sentence, len, work); work.EndNodeList[0].Alpha = 0f; for (int i = 0; i <= len; i++) { for (MeCabNode meCabNode = work.BeginNodeList[i]; meCabNode != null; meCabNode = meCabNode.BNext) { this.CalcAlpha(meCabNode, (double)this.theta); } } work.BeginNodeList[len].Beta = 0f; for (int num = len; num >= 0; num--) { for (MeCabNode meCabNode2 = work.EndNodeList[num]; meCabNode2 != null; meCabNode2 = meCabNode2.ENext) { this.CalcBeta(meCabNode2, (double)this.theta); } } work.Z = work.BeginNodeList[len].Alpha; for (int j = 0; j <= len; j++) { for (MeCabNode meCabNode3 = work.BeginNodeList[j]; meCabNode3 != null; meCabNode3 = meCabNode3.BNext) { meCabNode3.Prob = (float)Math.Exp((double)(meCabNode3.Alpha + meCabNode3.Beta - work.Z)); } } }
public MeCabNode GetEosNode() { MeCabNode eosNode = this.GetBosNode(); // same eosNode.Stat = MeCabNodeStat.Eos; return(eosNode); }
/// <summary> /// 形態素解析を行い、結果を保存します。 /// </summary> public void Execute() { var allText = File.ReadAllText(FileName); var mecabParam = new MeCabParam { DicDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\Chapter04.Core\dic\ipadic") }; MeCabTagger meCabTagger = MeCabTagger.Create(mecabParam); using (var writer = new StreamWriter(MecabFileName, false)) { MeCabNode node = meCabTagger.ParseToNode(allText); while (node != null) { if (node.CharType > 0) { writer.WriteLine(node.Surface + "," + node.Feature); } node = node.Next; } writer.Flush(); } }
private void ConnectNomal(int pos, MeCabNode rNode, ThreadData work) { for (; rNode != null; rNode = rNode.BNext) { long bestCost = int.MaxValue; // 2147483647 MeCabNode bestNode = null; for (MeCabNode lNode = work.EndNodeList[pos]; lNode != null; lNode = lNode.ENext) { long cost = lNode.Cost + this.connector.Cost(lNode, rNode); if (cost < bestCost) { bestNode = lNode; bestCost = cost; } } if (bestNode == null) { throw new MeCabException("too long sentence."); } rNode.Prev = bestNode; rNode.Next = null; rNode.Cost = bestCost; int x = rNode.RLength + pos; rNode.ENext = work.EndNodeList[x]; work.EndNodeList[x] = rNode; } }
private unsafe void ForwardBackward(char *sentence, int len, ThreadData work) { this.DoViterbi(sentence, len, work); work.EndNodeList[0].Alpha = 0f; for (int pos = 0; pos <= len; pos++) { for (MeCabNode node = work.BeginNodeList[pos]; node != null; node = node.BNext) { this.CalcAlpha(node, this.theta); } } work.BeginNodeList[len].Beta = 0f; for (int pos = len; pos >= 0; pos--) { for (MeCabNode node = work.EndNodeList[pos]; node != null; node = node.ENext) { this.CalcBeta(node, this.theta); } } work.Z = work.BeginNodeList[len].Alpha; // alpha of EOS for (int pos = 0; pos <= len; pos++) { for (MeCabNode node = work.BeginNodeList[pos]; node != null; node = node.BNext) { node.Prob = (float)Math.Exp(node.Alpha + node.Beta - work.Z); } } }
public void Run() { try { string sentence = "ユーザが本明細書において提供れるような方法"; MeCabParam param = new MeCabParam(); param.DicDir = @"..\..\dic\ipadic"; MeCabTagger t = MeCabTagger.Create(param); MeCabNode node = t.ParseToNode(sentence); while (node != null) { if (node.CharType > 0) { Console.WriteLine(node.Surface + "\t" + node.Feature); } node = node.Next; } Console.WriteLine(); } catch (Exception ex) { Console.WriteLine(ex.Message); } finally { Console.Read(); } }
public static IEnumerable <POS> Extract(string text, ref NLPCount count) { var segments = new List <POS>(); if (string.IsNullOrEmpty(text)) { return(segments); } MeCabNode node = tagger.ParseToNode(text); while (node != null) { if (node.CharType > 0) { if (node.Surface.Length <= 100) { segments.Add(new POS() { Text = node.Surface, PosTag = node.Feature.Split(',')[0] }); } } node = node.Next; } return(segments); }
public MeCabNode Next() { while (this.agenda.Count != 0) { QueueElement top = this.agenda.Pop(); MeCabNode rNode = top.Node; if (rNode.Stat == MeCabNodeStat.Bos) { for (QueueElement n = top; n.Next != null; n = n.Next) { n.Node.Next = n.Next.Node; // change next & prev n.Next.Node.Prev = n.Node; } return(rNode); } for (MeCabPath path = rNode.LPath; path != null; path = path.LNext) { QueueElement n = new QueueElement() { Node = path.LNode, Gx = path.Cost + top.Gx, Fx = path.LNode.Cost + path.Cost + top.Gx, Next = top }; this.agenda.Push(n); } } return(null); }
private void ConnectNomal(int pos, MeCabNode rNode, ThreadData work) { while (true) { if (rNode != null) { long num = 2147483647L; MeCabNode meCabNode = null; for (MeCabNode meCabNode2 = work.EndNodeList[pos]; meCabNode2 != null; meCabNode2 = meCabNode2.ENext) { long num2 = meCabNode2.Cost + this.connector.Cost(meCabNode2, rNode); if (num2 < num) { meCabNode = meCabNode2; num = num2; } } if (meCabNode != null) { rNode.Prev = meCabNode; rNode.Next = null; rNode.Cost = num; int num3 = rNode.RLength + pos; rNode.ENext = work.EndNodeList[num3]; work.EndNodeList[num3] = rNode; rNode = rNode.BNext; continue; } break; } return; } throw new MeCabException("too long sentence."); }
public MeCabNode Next() { while (this.agenda.Count != 0) { QueueElement queueElement = this.agenda.Pop(); MeCabNode node = queueElement.Node; if (node.Stat == MeCabNodeStat.Bos) { QueueElement queueElement2 = queueElement; while (queueElement2.Next != null) { queueElement2.Node.Next = queueElement2.Next.Node; queueElement2.Next.Node.Prev = queueElement2.Node; queueElement2 = queueElement2.Next; } return(node); } for (MeCabPath meCabPath = node.LPath; meCabPath != null; meCabPath = meCabPath.LNext) { QueueElement queueElement3 = new QueueElement(); queueElement3.Node = meCabPath.LNode; queueElement3.Gx = meCabPath.Cost + queueElement.Gx; queueElement3.Fx = meCabPath.LNode.Cost + meCabPath.Cost + queueElement.Gx; queueElement3.Next = queueElement; QueueElement item = queueElement3; this.agenda.Push(item); } } return(null); }
private void CalcBeta(MeCabNode n, double beta) { n.Beta = 0f; for (MeCabPath meCabPath = n.RPath; meCabPath != null; meCabPath = meCabPath.RNext) { n.Beta = (float)Utils.LogSumExp((double)n.Beta, (0.0 - beta) * (double)meCabPath.Cost + (double)meCabPath.RNode.Beta, meCabPath == n.RPath); } }
private void CalcAlpha(MeCabNode n, double beta) { n.Alpha = 0f; for (MeCabPath meCabPath = n.LPath; meCabPath != null; meCabPath = meCabPath.LNext) { n.Alpha = (float)Utils.LogSumExp((double)n.Alpha, (0.0 - beta) * (double)meCabPath.Cost + (double)meCabPath.LNode.Alpha, meCabPath == n.LPath); } }
private void ReadNodeInfo(MeCabDictionary dic, Token token, MeCabNode node) { node.LCAttr = token.LcAttr; node.RCAttr = token.RcAttr; node.PosId = token.PosId; node.WCost = token.WCost; node.SetFeature(token.Feature, dic); }
public MeCabNode GetNewNode() { MeCabNode node = new MeCabNode(); #if NeedId node.Id = Tokenizer.id++; #endif return(node); }
public MeCabNode GetBosNode() { MeCabNode newNode = this.GetNewNode(); newNode.Surface = "BOS/EOS"; newNode.Feature = this.bosFeature; newNode.IsBest = true; newNode.Stat = MeCabNodeStat.Bos; return newNode; }
public int Cost(MeCabNode lNode, MeCabNode rNode) { int pos = lNode.RCAttr + this.LSize * rNode.LCAttr; #if MMF_MTX return(this.matrix.ReadInt16(pos * sizeof(short)) + rNode.WCost); #else return(this.matrix[pos] + rNode.WCost); #endif }
public int Cost(MeCabNode lNode, MeCabNode rNode) { int pos = lNode.RCAttr + this.LSize * rNode.LCAttr; #if NET40 || NET45 || NETSTANDARD2_0 || NETSTANDARD2_1 return(this.matrix.ReadInt16(pos * sizeof(short)) + rNode.WCost); #else return(this.matrix[pos] + rNode.WCost); #endif }
private void CalcAlpha(MeCabNode n, double beta) { n.Alpha = 0f; for (MeCabPath path = n.LPath; path != null; path = path.LNext) { n.Alpha = (float)Utils.LogSumExp(n.Alpha, -beta * path.Cost + path.LNode.Alpha, path == n.LPath); } }
private void ReadNodeInfo(MeCabDictionary dic, Token token, MeCabNode node) { node.LCAttr = token.LcAttr; node.RCAttr = token.RcAttr; node.PosId = token.PosId; node.WCost = token.WCost; //node.Token = token; //node.Feature = dic.GetFeature(token); //この段階では素性情報を取得しない node.SetFeature(token.Feature, dic); //そのかわり遅延取得を可能にする }
private void CalcBeta(MeCabNode n, double beta) { n.Beta = 0f; for (MeCabPath path = n.RPath; path != null; path = path.RNext) { n.Beta = (float)Utils.LogSumExp(n.Beta, -beta * path.Cost + path.RNode.Beta, path == n.RPath); } }
public MeCabNode GetBosNode() { MeCabNode bosNode = this.GetNewNode(); bosNode.Surface = BosKey; // dummy bosNode.Feature = this.bosFeature; bosNode.IsBest = true; bosNode.Stat = MeCabNodeStat.Bos; return(bosNode); }
public void WriteLattice(StringBuilder os, MeCabNode bosNode) { for (MeCabNode node = bosNode.Next; node.Next != null; node = node.Next) { os.Append(node.Surface); os.Append("\t"); os.Append(node.Feature); os.AppendLine(); } os.AppendLine("EOS"); }
public Chunk(MeCabNode node) { 表層形 = node.Surface; stat = node.Stat; feature = node.Feature; string[] features = node.Feature.Split(','); 品詞 = "未定義"; 品詞細分類1 = ""; 品詞細分類2 = ""; 品詞細分類3 = ""; 活用形 = ""; 活用型 = ""; 原形 = ""; 読み = ""; 発音 = ""; if (1 <= features.Length) { 品詞 = features[0]; } if (2 <= features.Length) { 品詞細分類1 = features[1]; } if (3 <= features.Length) { 品詞細分類2 = features[2]; } if (4 <= features.Length) { 品詞細分類3 = features[3]; } if (5 <= features.Length) { 活用形 = features[4]; } if (6 <= features.Length) { 活用型 = features[5]; } if (7 <= features.Length) { 原形 = features[6]; } if (8 <= features.Length) { 読み = features[7]; } if (9 <= features.Length) { 発音 = features[8]; } }
/// <summary> /// 受け取ったnodeから先をIEnumerable>MeCabResult<に変換する /// </summary> /// <param name="node">変換対象のnodeの先頭</param> /// <returns>nodeから先をIEnumerable>MeCabResult<に変換したもの</returns> internal static IEnumerable <MeCabResult> ToMeCabResultEnumerable(this MeCabNode node) { while (node != null) { //BOS/EOSを弾く if (node.CharType > 0) { yield return(node.ToMeCabResult()); } node = node.Next; } }
private MeCabNode BuildBestLattice(ThreadData work) { MeCabNode meCabNode = work.EosNode; while (meCabNode.Prev != null) { meCabNode.IsBest = true; MeCabNode prev = meCabNode.Prev; prev.Next = meCabNode; meCabNode = prev; } return(work.BosNode); }
private MeCabNode BuildBestLattice(ThreadData work) { MeCabNode node = work.EosNode; for (MeCabNode prevNode; node.Prev != null;) { node.IsBest = true; prevNode = node.Prev; prevNode.Next = node; node = prevNode; } return(work.BosNode); }
public static List <(string, string, string)> GetLemmatized(string sentence) { int[] outputPos = null; if (StorageHelper.GetSetting <int>("LemmatizerMode") == 0) { outputPos = outputPos1; } else if (StorageHelper.GetSetting <int>("LemmatizerMode") == 1) { outputPos = outputPos2; } List <(string, string, string)> err = new List <(string, string, string)>(); err.Add((sentence, "", "")); try { if (!string.IsNullOrWhiteSpace(sentence)) { MeCabNode node = t.ParseToNode(sentence); List <(string, string, string)> lemmatized = new List <(string, string, string)>(); while (node != null) { if (node.CharType > 0) { if (outputPos == null || (outputPos != null && outputPos.Contains(node.PosId))) { var features = node.Feature.Split(','); if (node.Surface == "死ね") { lemmatized.Add(("死ぬ", "しぬ", "動詞")); } else if (node.Surface == "しね") { lemmatized.Add(("しぬ", "しぬ", "動詞")); } else { string str = features[features.Count() - 3]; if (str != "ない" && str != "する") { if (node.PosId == 33 && str == "いる") { lemmatized.Add(("居る", features[features.Count() - 2], features[0])); } else if (node.PosId == 37) { lemmatized.Add((str + "ない", features[features.Count() - 2] + "ナイ", features[0])); } else { lemmatized.Add((str, features[features.Count() - 2], features[0]));
public void WriteDump(StringBuilder os, MeCabNode bosNode) { for (MeCabNode node = bosNode; node != null; node = node.Next) { #if NeedId os.Append(node.Id).Append(" "); #endif if (node.Stat == MeCabNodeStat.Bos) os.Append("BOS"); else if (node.Stat == MeCabNodeStat.Eos) os.Append("EOS"); else os.Append(node.Surface); os.Append(" ").Append(node.Feature); os.Append(" ").Append(node.BPos); os.Append(" ").Append(node.EPos); os.Append(" ").Append(node.RCAttr); os.Append(" ").Append(node.LCAttr); os.Append(" ").Append(node.PosId); os.Append(" ").Append(node.CharType); os.Append(" ").Append((int)node.Stat); os.Append(" ").Append(node.IsBest ? "1" : "0"); os.Append(" ").Append(node.Alpha.ToString(FloatFormat)); os.Append(" ").Append(node.Beta.ToString(FloatFormat)); os.Append(" ").Append(node.Prob.ToString(FloatFormat)); os.Append(" ").Append(node.Cost); for (MeCabPath path = node.LPath; path != null; path = path.LNext) { #if NeedId os.Append(" ").Append(path.LNode.Id); #endif os.Append(" "); os.Append(":").Append(path.Cost); os.Append(":").Append(path.Prob.ToString(FloatFormat)); } os.AppendLine(); } }
private void ConnectNomal(int pos, MeCabNode rNode, ThreadData work) { for (; rNode != null; rNode = rNode.BNext) { long bestCost = int.MaxValue; // 2147483647 MeCabNode bestNode = null; for (MeCabNode lNode = work.EndNodeList[pos]; lNode != null; lNode = lNode.ENext) { long cost = lNode.Cost + this.connector.Cost(lNode, rNode); if (cost < bestCost) { bestNode = lNode; bestCost = cost; } } if (bestNode == null) throw new MeCabException("too long sentence."); rNode.Prev = bestNode; rNode.Next = null; rNode.Cost = bestCost; int x = rNode.RLength + pos; rNode.ENext = work.EndNodeList[x]; work.EndNodeList[x] = rNode; } }
private void ConnectWithAllPath(int pos, MeCabNode rNode, ThreadData work) { for (; rNode != null; rNode = rNode.BNext) { long bestCost = int.MaxValue; // 2147483647 MeCabNode bestNode = null; for (MeCabNode lNode = work.EndNodeList[pos]; lNode != null; lNode = lNode.ENext) { int lCost = this.connector.Cost(lNode, rNode); // local cost long cost = lNode.Cost + lCost; if (cost < bestCost) { bestNode = lNode; bestCost = cost; } MeCabPath path = new MeCabPath() { Cost = lCost, RNode = rNode, LNode = lNode, LNext = rNode.LPath, RNext = lNode.RPath }; rNode.LPath = path; lNode.RPath = path; } if (bestNode == null) throw new ArgumentException("too long sentence."); rNode.Prev = bestNode; rNode.Next = null; rNode.Cost = bestCost; int x = rNode.RLength + pos; rNode.ENext = work.EndNodeList[x]; work.EndNodeList[x] = rNode; } }
private MeCabNode FilterNode(MeCabNode node, int pos, ThreadData work) { if (!this.Partial) return node; MeCabNode c = work.BeginNodeList[pos]; if (c == null) return node; bool wild = (c.Feature == "*"); MeCabNode prev = null; MeCabNode result = null; for (MeCabNode n = node; n != null; n = n.BNext) { if (c.Surface == n.Surface && (wild || this.PartialMatch(c.Feature, n.Feature))) { if (prev != null) { prev.BNext = n; prev = n; } else { result = n; prev = result; } } } if (result == null) result = c; if (prev != null) prev.BNext = null; return result; }
public void WriteNone(StringBuilder os, MeCabNode bosNode) { // do nothing }
private unsafe void AddUnknown(ref MeCabNode resultNode, CharInfo cInfo, char* begin, char* begin2, char* begin3) { Token[] token = this.unkTokens[cInfo.DefaultType]; for (int i = 0; i < token.Length; i++) { MeCabNode newNode = this.GetNewNode(); this.ReadNodeInfo(this.unkDic, token[i], newNode); newNode.CharType = cInfo.DefaultType; newNode.Surface = new string(begin2, 0, (int)(begin3 - begin2)); newNode.Length = (int)(begin3 - begin2); newNode.RLength = (int)(begin3 - begin); newNode.BNext = resultNode; newNode.Stat = MeCabNodeStat.Unk; if (this.unkFeature != null) newNode.Feature = this.unkFeature; resultNode = newNode; } }
public void Write(StringBuilder os, MeCabNode bosNode) { this.write(os, bosNode); }
public void WriteWakati(StringBuilder os, MeCabNode bosNode) { MeCabNode node = bosNode.Next; if (node.Next != null) { os.Append(node.Surface); for (node = node.Next; node.Next != null; node = node.Next) { os.Append(" "); os.Append(node.Surface); } } os.AppendLine(); }
public void WriteUser(StringBuilder os, MeCabNode bosNode) { throw new NotImplementedException(); }
public MeCabNode GetNewNode() { MeCabNode node = new MeCabNode(); #if NeedId node.Id = Tokenizer.id++; #endif return node; }
public void WriteEM(StringBuilder os, MeCabNode bosNode) { const float MinProb = 0.0001f; for (MeCabNode node = bosNode; node != null; node = node.Next) { if (node.Prob >= MinProb) { os.Append("U\t"); if (node.Stat == MeCabNodeStat.Bos) os.Append("BOS"); else if (node.Stat == MeCabNodeStat.Eos) os.Append("EOS"); else os.Append(node.Surface); os.Append("\t").Append(node.Feature); os.Append("\t").Append(node.Prob.ToString(FloatFormat)); os.AppendLine(); } for (MeCabPath path = node.LPath; path != null; path = path.LNext) { if (path.Prob >= MinProb) { os.Append("B\t").Append(path.LNode.Feature); os.Append("\t").Append(node.Feature); os.Append("\t").Append(path.Prob.ToString(FloatFormat)); os.AppendLine(); } } } os.AppendLine("EOS"); }
public unsafe void WriteNode(StringBuilder os, char* p, string sentence, MeCabNode node) { for (; *p != 0x0; p++) { switch (*p) { default: os.Append(*p); break; case '%': switch (*++p) { default: os.Append("unkonwn meta char ").Append(*p); break; case 'S': os.Append(sentence); break; case 'L': os.Append(sentence.Length); break; case 'm': os.Append(node.Surface); break; case 'M': os.Append(sentence, (node.BPos - node.RLength + node.Length), node.RLength); break; case 'h': os.Append(node.PosId); break; case '%': os.Append('%'); break; case 'c': os.Append(node.WCost); break; case 'H': os.Append(node.Feature); break; case 't': os.Append(node.CharType); break; case 's': os.Append(node.Stat); break; case 'P': os.Append(node.Prob); break; case 'p': switch (*++p) { default: throw new ArgumentException("[iseSCwcnblLh] is required after %p"); #if NeedId case 'i': os.Append(node.Id); break; #else case 'i': throw new ArgumentException("%pi is not supported"); #endif case 'S': os.Append(sentence, node.BPos, (node.RLength - node.Length)); break; case 's': os.Append(node.BPos); break; case 'e': os.Append(node.EPos); break; case 'C': os.Append(node.Cost - node.Prev.Cost - node.WCost); break; case 'w': os.Append(node.WCost); break; case 'c': os.Append(node.Cost); break; case 'n': os.Append(node.Cost - node.Prev.Cost); break; case 'b': os.Append(node.IsBest ? '*' : ' '); break; case 'P': os.Append(node.Prob); break; case 'A': os.Append(node.Alpha); break; case 'B': os.Append(node.Beta); break; case 'l': os.Append(node.Length); break; case 'L': os.Append(node.RLength); break; case 'h': switch (*++p) { default: throw new ArgumentException("lr is required after %ph"); case 'l': os.Append(node.LCAttr); break; case 'r': os.Append(node.RCAttr); break; }; break; case 'p': char mode = *++p; char sep = *++p; if (sep == '\\') sep = this.GetEscapedChar(*++p); if (node.LPath == null) throw new InvalidOperationException("no path information, use -l option"); for (MeCabPath path = node.LPath; path != null; path = path.LNext) { if (path != node.LPath) os.Append(sep); switch (mode) { case 'i': os.Append(path.LNode.PosId); break; case 'c': os.Append(path.Cost); break; case 'P': os.Append(path.Prob); break; default: throw new ArgumentException("[icP] is required after %pp"); } } break; } break; case 'f': case 'F': char separator = '\t'; if (*p == 'F') if (*++p == '\\') separator = this.GetEscapedChar(*++p); else separator = *p; if (*++p != '[') throw new ArgumentException("cannot find '['"); string[] features = node.Feature.Split(','); int n = 0; while (true) { if (char.IsDigit(*++p)) { n = n * 10 + (*p - '0'); continue; } if (n >= features.Length) throw new ArgumentException("given index is out of range"); os.Append(features[n]); if (*++p == ',') { os.Append(separator); n = 0; continue; } if (*p == ']') break; throw new ArgumentException("cannot find ']'"); } break; } break; } } }