Пример #1
0
 /// <summary>
 /// コンストラクタ
 /// </summary>
 /// <param name="text">テキスト</param>
 public ParagraphData(string text)
 {
     Text         = text;
     TokenList    = WordLogic.GetTokenList(text);
     TokenTbl     = WordLogic.GetBasicTokenTbl(text);
     TokenTypeTbl = WordLogic.GetTokenTypeTbl(TokenList);
     InfoRate     = AnalyzeLogic.CalcInfoRate(TokenList);
 }
Пример #2
0
        public void 品詞を限定したトークンリストを取得できること(string testData_type, int testData_tokenNum)
        {
            var text               = "形態素解析とは、文法的な情報の注記の無い自然言語のテキストデータから、対象言語の文法や、辞書と呼ばれる単語の品詞等の情報にもとづき、形態素の列に分割し、それぞれの形態素の品詞等を判別する作業である。";
            var tokenList          = WordLogic.GetTokenList(text);
            var extractedTokenList = AnalyzeLogic.ExtractTokenType(tokenList, testData_type);

            Assert.AreEqual(testData_tokenNum, extractedTokenList.Count);
            foreach (var token in extractedTokenList)
            {
                Assert.AreEqual(testData_type, token.Type);
            }
        }
Пример #3
0
        /// <summary>
        /// メインメニュー:ファイル > 文章解析
        /// </summary>
        /// <param name="sender">送信元</param>
        /// <param name="e">イベント</param>
        private void mwnuSentenceAnalyzeToolStripMenuItem_Click(object sender, EventArgs e)
        {
            try
            {
                mTxtResult.Text = string.Empty;
                var sb   = new StringBuilder();
                var ctrl = new WordCtrl();

                ctrl.Analyze(mTxtTgtText.Text);
                sb.AppendLine("====================");
                sb.AppendFormat("情報量={0:#.#}[%]", ctrl.InfoRate * 100).AppendLine();
                sb.AppendLine("====================");
                foreach (var key in ctrl.TokenTypeTbl.Keys)
                {
                    sb.AppendLine(string.Format("\t=== {0} ===", key));
                    var extractTokenTbl = AnalyzeLogic.ExtractTokenType(ctrl.TokenTbl, key);
                    foreach (var token in extractTokenTbl.Keys)
                    {
                        sb.AppendLine(string.Format("\t\t{0}: {1}",
                                                    token.Replace("\0", ""),
                                                    extractTokenTbl[token].Count()));
                    }
                }
                foreach (var paragraph in ctrl.ParagraphList)
                {
                    sb.AppendLine("------------------------------");
                    sb.AppendLine(paragraph.Text);
                    sb.AppendFormat("情報量={0:#.#}[%]", paragraph.InfoRate * 100).AppendLine();
                    foreach (var key in paragraph.TokenTypeTbl.Keys)
                    {
                        sb.AppendLine(string.Format("\t=== {0} ===", key));
                        var extractTokenTbl = AnalyzeLogic.ExtractTokenType(paragraph.TokenTbl, key);
                        foreach (var token in extractTokenTbl.Keys)
                        {
                            sb.AppendLine(string.Format("\t\t{0}: {1}",
                                                        token.Replace("\0", ""),
                                                        extractTokenTbl[token].Count()));
                            //foreach(var word in extractTokenTbl[token])
                            //{
                            //    sb.AppendLine(string.Format("\t\t\t{0}", word.Word));
                            //}
                        }
                    }
                }
                mTxtResult.Text = sb.ToString();
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
Пример #4
0
        /// <summary>
        /// 解析処理
        /// </summary>
        /// <param name="text">テキスト</param>
        public void Analyze(string text)
        {
            ParagraphList.Clear();

            var strParagraphList = ParagraphLogic.SplitParagraph(text);

            foreach (var strParagraph in strParagraphList)
            {
                ParagraphList.Add(new ParagraphData(strParagraph));
            }

            TokenList    = WordLogic.GetTokenList(text);
            TokenTbl     = WordLogic.GetBasicTokenTbl(text);
            TokenTypeTbl = WordLogic.GetTokenTypeTbl(TokenList);
            InfoRate     = AnalyzeLogic.CalcInfoRate(TokenList);
        }