private void button3_Click(object sender, EventArgs e) { try { string dir = Path.GetDirectoryName(Path.GetFullPath(textBox1.Text)); string testPath = "ldaTestDoc.txt"; string model = Path.GetFileNameWithoutExtension(textBox1.Text); LDA.WriteLdaDocToFile(Preprocessor.SegmentRemoveStopWords(new string[] { textBox2.Text }.ToList(), false), Path.Combine(dir, testPath)); string args = string.Format("-inf -dir {0} -model {1} -dfile {2} -niters 20", dir, model, testPath); ProcessUtils.StartProcess("lda.exe", args); //控制台输出预测结果 var docTopicArray = LDA.ReadDocumentTopicData(Path.Combine(dir, "ldaTestDoc.txt.theta"));//把预测结果读入内存 Console.WriteLine("[{0}]读取主题映射...", DateTime.Now); var topicLabel = LDA.GetTopicLabel(Path.Combine(dir, "ldaTopicLabel.txt"));//这个话题标签在执行本句代码之前需要人工指定好,根据model-final.twords文件一一对照打上标签 Console.WriteLine("[{0}]开始预测主题...", DateTime.Now); var topicIndex = LDA.GetDocmentTopicIndexList(docTopicArray);//根据预测结果挑选出每篇文档最可能的主题的索引 label3.Text = topicLabel[topicIndex[0]]; } catch (Exception ex) { MessageBox.Show(ex.Message); //throw; } }
private void button1_Click(object sender, EventArgs e) { var docs = LDA.GetLDATrainDoc(Convert.ToInt32(textBox1.Text)); var bow = Preprocessor.SegmentRemoveStopWords(docs); LDA.WriteLdaDocToFile(bow, textBox2.Text); MessageBox.Show("已经生成验证集,接下来可以验证了"); }
private void button2_Click(object sender, EventArgs e) { try { var fullPath = Path.GetFullPath(textBox1.Text); var dir = Path.GetDirectoryName(fullPath); var inferDocFile = "ldaInferDoc.txt"; var thetaFile = inferDocFile + ".theta"; var topicLabelFile = "ldaTopicLabel.txt"; var inferDocAbsPath = Path.Combine(dir, inferDocFile); var thetaAbsPath = Path.Combine(dir, thetaFile); var topicLabelAbsPath = Path.Combine(dir, topicLabelFile); Console.WriteLine("[{0}]开始预测主题...", DateTime.Now); List <string> tidList = null; Console.WriteLine("[{0}]获取微博...", DateTime.Now); var inferOriginalDocs = LDA.GetLDAInferDoc(out tidList); Console.WriteLine("[{0}]预处理...", DateTime.Now); var inferDocs = Preprocessor.SegmentRemoveStopWords(inferOriginalDocs, false); Console.WriteLine("[{0}]写入文件...", DateTime.Now); LDA.WriteLdaDocToFile(inferDocs, inferDocAbsPath); Console.WriteLine("[{0}]吉布斯采样...", DateTime.Now); string args = string.Format("-inf -dir {0} -model {1} -dfile {2} -niters 20", dir, "model-final", inferDocFile); ProcessUtils.StartProcess("lda.exe", args); //控制台输出预测结果 Console.WriteLine("[{0}]读取预测结果...", DateTime.Now); var docTopicArray = LDA.ReadDocumentTopicData(thetaAbsPath);//把预测结果读入内存 Console.WriteLine("[{0}]读取主题映射...", DateTime.Now); var topicLabel = LDA.GetTopicLabel(topicLabelAbsPath);//这个话题标签在执行本句代码之前需要人工指定好,根据model-final.twords文件一一对照打上标签 Console.WriteLine("[{0}]开始预测主题...", DateTime.Now); var topicIndex = LDA.GetDocmentTopicIndexList(docTopicArray);//根据预测结果挑选出每篇文档最可能的主题的索引 Console.WriteLine("[{0}]以下是微博的主题预测情况:", DateTime.Now); LDA.PrintTopics(inferOriginalDocs, topicIndex, topicLabel); //保存数据库 Console.WriteLine("[{0}]开始存储主题...", DateTime.Now); LDA.SaveTopicToDB(topicIndex, topicLabel, tidList); MessageBox.Show("任务已完成!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information); } catch (Exception ex) { MessageBox.Show(ex.Message, "出错啦", MessageBoxButtons.OK, MessageBoxIcon.Error); } }