private void button2_Click(object sender, EventArgs e) { try { //路径 var fullPath = Path.GetFullPath(textBox2.Text); var dir = Path.GetDirectoryName(fullPath); var trainDoc = FileUtils.RelativePath(Application.StartupPath, fullPath).Replace('\\', '/'); var finalData = Path.Combine(dir, "evaluate.txt"); File.Delete(finalData); //初始参数 int cnt = Convert.ToInt32(txtCnt.Text); double init = Convert.ToDouble(txtIni.Text); double inc = Convert.ToDouble(txtInc.Text); //初始化数据表 InitTable(); //开始评估 for (int i = 0; i < cnt; i++) { if (rbK.Checked) { args = string.Format("-est -dir -beta {0} -ntopics {1} -niters 1000 -savestep 500 -dfile {2}", 0.1, (int)(init + i * inc), trainDoc); } else { args = string.Format("-est -beta {0} -ntopics {1} -niters 1000 -savestep 500 -dfile {2}", init + i * inc, 12, trainDoc); } ProcessUtils.StartProcess("lda.exe", args); var tt = FileUtils.ReadAsList(Path.Combine(dir, "model-final.theta")); var pp = FileUtils.ReadAsList(Path.Combine(dir, "model-final.phi")); var aa = FileUtils.ReadAsList(Path.Combine(dir, "model-final.tassign")); var d = LDA.GetPerplexity(pp, tt, aa); table.Rows.Add(init + i * inc, d); label6.Text = string.Format("已完成:{0}/{1}", i + 1, cnt); } //设置图表 chartControl1.Series[0].DataSource = table; chartControl1.Series[0].SetDataMembers("x", "y"); var xyDiagram = (DevExpress.XtraCharts.XYDiagram)(chartControl1.Diagram); xyDiagram.AxisX.Title.Text = rbK.Checked ? "主题个数K" : "超参数β"; xyDiagram.EnableAxisYScrolling = true; xyDiagram.EnableAxisYZooming = true; chartControl1.Titles[0].Text = rbK.Checked ? "主题个数K对聚类的影响" : "超参数β对聚类的影响"; //保存模型评估数据 var sw = File.AppendText(finalData); for (int i = 0; i < table.Rows.Count; i++) { sw.WriteLine(table.Rows[i]["x"] + "\t" + table.Rows[i]["y"]); } sw.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); //throw; } }
private void button2_Click(object sender, EventArgs e) { dir = Path.GetDirectoryName(Path.GetFullPath(textBox2.Text)); StringBuilder sb = new StringBuilder(); sb.Append("-est "); if (txtB.Text != "") { sb.AppendFormat(" -beta {0} ", txtB.Text); } if (txtK.Text != "") { sb.AppendFormat(" -ntopics {0} ", txtK.Text); } if (txtI.Text != "") { sb.AppendFormat(" -niters {0} ", txtI.Text); } if (txtA.Text != "") { sb.AppendFormat(" -alpha {0} ", txtA.Text); } sb.AppendFormat(" -savestep 300 -twords 15 -dfile {0} ", textBox2.Text); ProcessUtils.StartProcess("lda.exe", sb.ToString()); var data = LDA.ReadTopicWordsWeight(Path.Combine(dir, "model-final.twords")); ShowWords(data); MessageBox.Show("模型训练完成,接下来请根据高频词填写对应的主题标签,并记得保存"); }
private void button3_Click(object sender, EventArgs e) { try { string dir = Path.GetDirectoryName(Path.GetFullPath(textBox1.Text)); string testPath = "ldaTestDoc.txt"; string model = Path.GetFileNameWithoutExtension(textBox1.Text); LDA.WriteLdaDocToFile(Preprocessor.SegmentRemoveStopWords(new string[] { textBox2.Text }.ToList(), false), Path.Combine(dir, testPath)); string args = string.Format("-inf -dir {0} -model {1} -dfile {2} -niters 20", dir, model, testPath); ProcessUtils.StartProcess("lda.exe", args); //控制台输出预测结果 var docTopicArray = LDA.ReadDocumentTopicData(Path.Combine(dir, "ldaTestDoc.txt.theta"));//把预测结果读入内存 Console.WriteLine("[{0}]读取主题映射...", DateTime.Now); var topicLabel = LDA.GetTopicLabel(Path.Combine(dir, "ldaTopicLabel.txt"));//这个话题标签在执行本句代码之前需要人工指定好,根据model-final.twords文件一一对照打上标签 Console.WriteLine("[{0}]开始预测主题...", DateTime.Now); var topicIndex = LDA.GetDocmentTopicIndexList(docTopicArray);//根据预测结果挑选出每篇文档最可能的主题的索引 label3.Text = topicLabel[topicIndex[0]]; } catch (Exception ex) { MessageBox.Show(ex.Message); //throw; } }
private void button1_Click(object sender, EventArgs e) { var docs = LDA.GetLDATrainDoc(Convert.ToInt32(textBox1.Text)); var bow = Preprocessor.SegmentRemoveStopWords(docs); LDA.WriteLdaDocToFile(bow, textBox2.Text); MessageBox.Show("已经生成验证集,接下来可以验证了"); }
private void button4_Click(object sender, EventArgs e) { string[] labels = new string[dataGridView1.Rows.Count]; for (int i = 0; i < dataGridView1.Rows.Count; i++) { labels[i] = dataGridView1[2, i].Value.ToString(); } LDA.SetTopicLabel(labels, Path.Combine(dir, "ldaTopicLabel.txt")); MessageBox.Show("模型保存成功"); }
private void button2_Click(object sender, EventArgs e) { try { var fullPath = Path.GetFullPath(textBox1.Text); var dir = Path.GetDirectoryName(fullPath); var inferDocFile = "ldaInferDoc.txt"; var thetaFile = inferDocFile + ".theta"; var topicLabelFile = "ldaTopicLabel.txt"; var inferDocAbsPath = Path.Combine(dir, inferDocFile); var thetaAbsPath = Path.Combine(dir, thetaFile); var topicLabelAbsPath = Path.Combine(dir, topicLabelFile); Console.WriteLine("[{0}]开始预测主题...", DateTime.Now); List <string> tidList = null; Console.WriteLine("[{0}]获取微博...", DateTime.Now); var inferOriginalDocs = LDA.GetLDAInferDoc(out tidList); Console.WriteLine("[{0}]预处理...", DateTime.Now); var inferDocs = Preprocessor.SegmentRemoveStopWords(inferOriginalDocs, false); Console.WriteLine("[{0}]写入文件...", DateTime.Now); LDA.WriteLdaDocToFile(inferDocs, inferDocAbsPath); Console.WriteLine("[{0}]吉布斯采样...", DateTime.Now); string args = string.Format("-inf -dir {0} -model {1} -dfile {2} -niters 20", dir, "model-final", inferDocFile); ProcessUtils.StartProcess("lda.exe", args); //控制台输出预测结果 Console.WriteLine("[{0}]读取预测结果...", DateTime.Now); var docTopicArray = LDA.ReadDocumentTopicData(thetaAbsPath);//把预测结果读入内存 Console.WriteLine("[{0}]读取主题映射...", DateTime.Now); var topicLabel = LDA.GetTopicLabel(topicLabelAbsPath);//这个话题标签在执行本句代码之前需要人工指定好,根据model-final.twords文件一一对照打上标签 Console.WriteLine("[{0}]开始预测主题...", DateTime.Now); var topicIndex = LDA.GetDocmentTopicIndexList(docTopicArray);//根据预测结果挑选出每篇文档最可能的主题的索引 Console.WriteLine("[{0}]以下是微博的主题预测情况:", DateTime.Now); LDA.PrintTopics(inferOriginalDocs, topicIndex, topicLabel); //保存数据库 Console.WriteLine("[{0}]开始存储主题...", DateTime.Now); LDA.SaveTopicToDB(topicIndex, topicLabel, tidList); MessageBox.Show("任务已完成!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information); } catch (Exception ex) { MessageBox.Show(ex.Message, "出错啦", MessageBoxButtons.OK, MessageBoxIcon.Error); } }