Example #1
0
        public IEnumerable <MecabWordInfo> MecabWordEnumerable(string sentence)
        {
            foreach (var node in tagger.ParseToNodes(sentence))
            {
                if (node.CharType > 0)
                {
                    var features = node.Feature.Split(',');

                    #region 填充 MecabWordInfo 各项 Property
                    MecabWordInfo word = new MecabWordInfo
                    {
                        Word         = node.Surface,
                        PartOfSpeech = features[0],
                        Description  = features[1],
                        Feature      = node.Feature,
                        Kana         = " "
                    };
                    // 加这一步是为了防止乱码进入分词导致无法读取假名
                    if (features.Length >= 8)
                    {
                        word.Kana = features[7];
                    }

                    if (word.PartOfSpeech == "記号" ||
                        WanaKana.IsHiragana(node.Surface) ||
                        WanaKana.IsKatakana(node.Surface))
                    {
                        word.Kana = " ";
                    }
                    #endregion

                    yield return(word);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Progress sentence
        /// </summary>
        /// <param name="sentence"></param>
        /// <returns></returns>
        public List <MecabWordInfo> SentenceHandle(string sentence)
        {
            List <MecabWordInfo> ret = new List <MecabWordInfo>();

            foreach (var node in tagger.ParseToNodes(sentence))
            {
                if (node.CharType > 0)
                {
                    var features = node.Feature.Split(',');

                    #region 填充 MecabWordInfo 各项 Property
                    MecabWordInfo word = new MecabWordInfo
                    {
                        Word         = node.Surface,
                        PartOfSpeech = features[0],
                        Description  = features[1],
                        Feature      = node.Feature
                    };
                    // 加这一步是为了防止乱码进入分词导致无法读取假名
                    if (features.Length >= 8)
                    {
                        word.Kana = features[7];
                    }
                    // 清理不需要的假名
                    if (word.PartOfSpeech == "記号")
                    {
                        word.Kana = "";
                    }

                    if (WanaKana.IsHiragana(node.Surface))
                    {
                        word.Kana = "";
                    }
                    if (WanaKana.IsKatakana(node.Surface))
                    {
                        word.Kana = "";
                    }
                    #endregion

                    ret.Add(word);
                }
            }

            return(ret);
        }
Example #3
0
 public bool IsKatkana(string input) => WanaKana.IsKatakana(input);
 public Boolean IsKatkana(String input) => WanaKana.IsKatakana(input);
 public void IsKatakanaResponsesMatch(string input, bool expectedResult)
 {
     Assert.Equal(expectedResult, WanaKana.IsKatakana(input));
 }
 public void EmptyReturnsFalse()
 {
     Assert.False(WanaKana.IsKatakana(string.Empty));
 }