public void MeCabIpaDicNodeMethodExample() { var sentence = Example1; var tagger = MeCabTagger.Create(); var enumerableSet = tagger.ParseToNodes(sentence); using var enumerator = enumerableSet.GetEnumerator(); enumerator.MoveNext(); // Feature: BOS/EOS,*,*,*,*,*,*,*,* enumerator.MoveNext(); MeCabNode node = enumerator.Current !; Trace.WriteLine($"Alpha: {node.Alpha}"); Trace.WriteLine($"Beta: {node.Beta}"); Trace.WriteLine($"`MeCabNode` BNext: {node.BNext}"); Trace.WriteLine($"BPos: {node.BPos}"); Trace.WriteLine($"CharType: {node.CharType}"); Trace.WriteLine($"Cost: {node.Cost}"); Trace.WriteLine($"`MeCabNode` ENext: {node.ENext}"); Trace.WriteLine($"EPos: {node.EPos}"); Trace.WriteLine($"Feature: {node.Feature}"); Trace.WriteLine($"GetConjugatedForm() Feature[4]: {node.GetConjugatedForm()}"); // CTYPE Trace.WriteLine($"GetInflection() Feature[5]: {node.GetInflection()}"); // CFORM Trace.WriteLine($"GetOriginalForm() Feature[6]: {node.GetOriginalForm()}"); // BASIC Trace.WriteLine($"GetPartsOfSpeech() Feature[0]: {node.GetPartsOfSpeech()}"); Trace.WriteLine($"GetPartsOfSpeechSection1() Feature[1]: {node.GetPartsOfSpeechSection1()}"); Trace.WriteLine($"GetPartsOfSpeechSection2() Feature[2]: {node.GetPartsOfSpeechSection2()}"); Trace.WriteLine($"GetPartsOfSpeechSection3() Feature[3]: {node.GetPartsOfSpeechSection3()}"); Trace.WriteLine($"GetPronunciation() Feature[8]: {node.GetPronounciation()}"); Trace.WriteLine($"GetReading() Feature[7]: {node.GetReading()}"); Trace.WriteLine($"IsBest: {node.IsBest}"); Trace.WriteLine($"LCAttr: {node.LCAttr}"); Trace.WriteLine($"Length: {node.Length}"); Trace.WriteLine($"`MeCabNode` Next: {node.Next}"); Trace.WriteLine($"PosId: {node.PosId}"); Trace.WriteLine($"`MeCabNode` Prev: {node.Prev}"); Trace.WriteLine($"Prob: {node.Prob}"); Trace.WriteLine($"RCAttr: {node.RCAttr}"); Trace.WriteLine($"RLength: {node.RLength}"); Trace.WriteLine($"Stat: {node.Stat}"); Trace.WriteLine($"Surface: {node.Surface}"); Trace.WriteLine($"WCost: {node.WCost}"); }
/// <summary> /// The Generator /// </summary> /// <param name="nodeEnumerable">Include IpaDic info</param> /// <returns></returns> /// <exception cref="InvalidOperationException"></exception> public static IEnumerable <VeWord> Words(IEnumerable <MeCabNode> nodeEnumerable) { var tokenArray = new List <MeCabNode>(nodeEnumerable).ToArray()[1..^ 1]; List <VeWord> wordList = new(); MeCabNode? previous = null; // FIXME: Not stable for (var i = 0; i < tokenArray.Length; i++) { var finalSlot = wordList.Count - 1; MeCabNode current = tokenArray[i]; MeCabNode following; PartOfSpeech partOfSpeech; var grammar = Grammar.Unassigned; var eatNext = false; var eatLemma = true; var attachToPrevious = false; var alsoAttachToLemma = false; var updatePos = false; switch (current.GetPartsOfSpeech()) { case "名詞": { partOfSpeech = PartOfSpeech.詞; if (current.GetPartsOfSpeechSection1().Equals(NoData)) { break; } switch (current.GetPartsOfSpeechSection1()) { case "固有名詞": partOfSpeech = PartOfSpeech.固有名詞; break; case "代名詞": partOfSpeech = PartOfSpeech.代名詞; break; case "副詞可能": case "サ変接続": case "形容動詞語幹": case "ナイ形容詞語幹": // Refers to line 213 of Ve. if (current.GetPartsOfSpeechSection2().Equals(NoData)) { break; } // protects against array overshooting. if (i == tokenArray.Length - 1) { break; } following = tokenArray[i + 1]; switch (following.GetConjugatedForm()) // [CTYPE] { case "サ変・スル": partOfSpeech = PartOfSpeech.動詞; eatNext = true; break; case "特殊・ダ": partOfSpeech = PartOfSpeech.形容詞; if (following.GetPartsOfSpeechSection1().Equals("体言接続")) { eatNext = true; eatLemma = false; } break; case "特殊・ナイ": partOfSpeech = PartOfSpeech.形容詞; eatNext = true; break; default: if (following.GetPartsOfSpeech().Equals("助詞") && following.Surface.Equals("に")) { // Ve script redundantly (I think) also has eat_next = false here. partOfSpeech = PartOfSpeech.副詞; } break; } break; case "非自立": case "特殊": // Refers to line 233 of Ve. if (current.GetPartsOfSpeechSection2().Equals(NoData)) { break; } // protects against array overshooting. if (i == tokenArray.Length - 1) { break; } following = tokenArray[i + 1]; switch (current.GetPartsOfSpeechSection2()) { case "副詞可能": if (following.GetPartsOfSpeech().Equals("助詞") && following.Surface.Equals("に")) { partOfSpeech = PartOfSpeech.副詞; // Changed this to false because 'case JOSHI' has 'attach_to_previous = true'. eatNext = false; } break; case "助動詞語幹": if (following.GetConjugatedForm().Equals("特殊・ダ")) { partOfSpeech = PartOfSpeech.動詞; grammar = Grammar.Auxiliary; if (following.GetInflection().Equals("体言接続")) { eatNext = true; } } else if (following.GetPartsOfSpeech().Equals("助詞") && following.GetPartsOfSpeechSection2().Equals("副詞化")) { partOfSpeech = PartOfSpeech.副詞; eatNext = true; } break; case "形容動詞語幹": partOfSpeech = PartOfSpeech.形容詞; if (following.GetConjugatedForm().Equals("特殊・ダ") && following.GetInflection().Equals("体言接続") || following.GetPartsOfSpeechSection1().Equals("連体化")) { eatNext = true; } break; } break; case "数": // TODO: "recurse and find following numbers and add to this word. Except non-numbers // like 幾" // Refers to line 261. partOfSpeech = PartOfSpeech.数; if (wordList.Count > 0 && wordList[finalSlot].PartOfSpeech.Equals(PartOfSpeech.数)) { attachToPrevious = true; alsoAttachToLemma = true; } break; case "接尾": // Refers to line 267. switch (current.GetPartsOfSpeechSection2()) { case "人名": partOfSpeech = PartOfSpeech.人名接尾; break; case "特殊": if (current.GetOriginalForm().Equals("さ")) { updatePos = true; partOfSpeech = PartOfSpeech.詞; } attachToPrevious = true; break; default: // 助数词 alsoAttachToLemma = true; attachToPrevious = true; break; } break; case "接続詞的": partOfSpeech = PartOfSpeech.接続詞; break; case "動詞非自立的": partOfSpeech = PartOfSpeech.動詞; grammar = Grammar.Nominal; // not using. break; } } break; case "接頭詞": // TODO: "elaborate this when we have the "main part" feature for words?" partOfSpeech = PartOfSpeech.接頭詞; break; case "助動詞": // Refers to line 290. partOfSpeech = PartOfSpeech.助詞; List <string> tokushuList = new() { "特殊・タ", "特殊・ナイ", "特殊・タイ", "特殊・マス", "特殊・ヌ" }; if (previous is null || !previous.GetPartsOfSpeechSection1().Equals("係助詞") && tokushuList.Contains(current.GetConjugatedForm())) { attachToPrevious = true; }