public IEnumerable <MecabWordInfo> MecabWordEnumerable(string sentence) { foreach (var node in tagger.ParseToNodes(sentence)) { if (node.CharType > 0) { var features = node.Feature.Split(','); #region 填充 MecabWordInfo 各项 Property MecabWordInfo word = new MecabWordInfo { Word = node.Surface, PartOfSpeech = features[0], Description = features[1], Feature = node.Feature, Kana = " " }; // 加这一步是为了防止乱码进入分词导致无法读取假名 if (features.Length >= 8) { word.Kana = features[7]; } if (word.PartOfSpeech == "記号" || WanaKana.IsHiragana(node.Surface) || WanaKana.IsKatakana(node.Surface)) { word.Kana = " "; } #endregion yield return(word); } } }
/// <summary> /// Progress sentence /// </summary> /// <param name="sentence"></param> /// <returns></returns> public List <MecabWordInfo> SentenceHandle(string sentence) { List <MecabWordInfo> ret = new List <MecabWordInfo>(); foreach (var node in tagger.ParseToNodes(sentence)) { if (node.CharType > 0) { var features = node.Feature.Split(','); #region 填充 MecabWordInfo 各项 Property MecabWordInfo word = new MecabWordInfo { Word = node.Surface, PartOfSpeech = features[0], Description = features[1], Feature = node.Feature }; // 加这一步是为了防止乱码进入分词导致无法读取假名 if (features.Length >= 8) { word.Kana = features[7]; } // 清理不需要的假名 if (word.PartOfSpeech == "記号") { word.Kana = ""; } if (WanaKana.IsHiragana(node.Surface)) { word.Kana = ""; } if (WanaKana.IsKatakana(node.Surface)) { word.Kana = ""; } #endregion ret.Add(word); } } return(ret); }
public bool IsKatkana(string input) => WanaKana.IsKatakana(input);
public Boolean IsKatkana(String input) => WanaKana.IsKatakana(input);
public void IsKatakanaResponsesMatch(string input, bool expectedResult) { Assert.Equal(expectedResult, WanaKana.IsKatakana(input)); }
public void EmptyReturnsFalse() { Assert.False(WanaKana.IsKatakana(string.Empty)); }