/// <summary> /// コンストラクタ /// </summary> /// <param name="text">テキスト</param> public ParagraphData(string text) { Text = text; TokenList = WordLogic.GetTokenList(text); TokenTbl = WordLogic.GetBasicTokenTbl(text); TokenTypeTbl = WordLogic.GetTokenTypeTbl(TokenList); InfoRate = AnalyzeLogic.CalcInfoRate(TokenList); }
public void 品詞を限定したトークンリストを取得できること(string testData_type, int testData_tokenNum) { var text = "形態素解析とは、文法的な情報の注記の無い自然言語のテキストデータから、対象言語の文法や、辞書と呼ばれる単語の品詞等の情報にもとづき、形態素の列に分割し、それぞれの形態素の品詞等を判別する作業である。"; var tokenList = WordLogic.GetTokenList(text); var extractedTokenList = AnalyzeLogic.ExtractTokenType(tokenList, testData_type); Assert.AreEqual(testData_tokenNum, extractedTokenList.Count); foreach (var token in extractedTokenList) { Assert.AreEqual(testData_type, token.Type); } }
/// <summary> /// メインメニュー:ファイル > 文章解析 /// </summary> /// <param name="sender">送信元</param> /// <param name="e">イベント</param> private void mwnuSentenceAnalyzeToolStripMenuItem_Click(object sender, EventArgs e) { try { mTxtResult.Text = string.Empty; var sb = new StringBuilder(); var ctrl = new WordCtrl(); ctrl.Analyze(mTxtTgtText.Text); sb.AppendLine("===================="); sb.AppendFormat("情報量={0:#.#}[%]", ctrl.InfoRate * 100).AppendLine(); sb.AppendLine("===================="); foreach (var key in ctrl.TokenTypeTbl.Keys) { sb.AppendLine(string.Format("\t=== {0} ===", key)); var extractTokenTbl = AnalyzeLogic.ExtractTokenType(ctrl.TokenTbl, key); foreach (var token in extractTokenTbl.Keys) { sb.AppendLine(string.Format("\t\t{0}: {1}", token.Replace("\0", ""), extractTokenTbl[token].Count())); } } foreach (var paragraph in ctrl.ParagraphList) { sb.AppendLine("------------------------------"); sb.AppendLine(paragraph.Text); sb.AppendFormat("情報量={0:#.#}[%]", paragraph.InfoRate * 100).AppendLine(); foreach (var key in paragraph.TokenTypeTbl.Keys) { sb.AppendLine(string.Format("\t=== {0} ===", key)); var extractTokenTbl = AnalyzeLogic.ExtractTokenType(paragraph.TokenTbl, key); foreach (var token in extractTokenTbl.Keys) { sb.AppendLine(string.Format("\t\t{0}: {1}", token.Replace("\0", ""), extractTokenTbl[token].Count())); //foreach(var word in extractTokenTbl[token]) //{ // sb.AppendLine(string.Format("\t\t\t{0}", word.Word)); //} } } } mTxtResult.Text = sb.ToString(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
/// <summary> /// 解析処理 /// </summary> /// <param name="text">テキスト</param> public void Analyze(string text) { ParagraphList.Clear(); var strParagraphList = ParagraphLogic.SplitParagraph(text); foreach (var strParagraph in strParagraphList) { ParagraphList.Add(new ParagraphData(strParagraph)); } TokenList = WordLogic.GetTokenList(text); TokenTbl = WordLogic.GetBasicTokenTbl(text); TokenTypeTbl = WordLogic.GetTokenTypeTbl(TokenList); InfoRate = AnalyzeLogic.CalcInfoRate(TokenList); }