Пример #1
0
        private void button2_Click(object sender, EventArgs e)
        {
            try
            {
                //路径
                var fullPath  = Path.GetFullPath(textBox2.Text);
                var dir       = Path.GetDirectoryName(fullPath);
                var trainDoc  = FileUtils.RelativePath(Application.StartupPath, fullPath).Replace('\\', '/');
                var finalData = Path.Combine(dir, "evaluate.txt");
                File.Delete(finalData);

                //初始参数
                int    cnt  = Convert.ToInt32(txtCnt.Text);
                double init = Convert.ToDouble(txtIni.Text);
                double inc  = Convert.ToDouble(txtInc.Text);
                //初始化数据表
                InitTable();
                //开始评估
                for (int i = 0; i < cnt; i++)
                {
                    if (rbK.Checked)
                    {
                        args = string.Format("-est -dir -beta {0} -ntopics {1} -niters 1000 -savestep 500 -dfile {2}", 0.1, (int)(init + i * inc), trainDoc);
                    }
                    else
                    {
                        args = string.Format("-est -beta {0} -ntopics {1} -niters 1000 -savestep 500 -dfile {2}", init + i * inc, 12, trainDoc);
                    }
                    ProcessUtils.StartProcess("lda.exe", args);
                    var tt = FileUtils.ReadAsList(Path.Combine(dir, "model-final.theta"));
                    var pp = FileUtils.ReadAsList(Path.Combine(dir, "model-final.phi"));
                    var aa = FileUtils.ReadAsList(Path.Combine(dir, "model-final.tassign"));
                    var d  = LDA.GetPerplexity(pp, tt, aa);

                    table.Rows.Add(init + i * inc, d);
                    label6.Text = string.Format("已完成:{0}/{1}", i + 1, cnt);
                }
                //设置图表
                chartControl1.Series[0].DataSource = table;
                chartControl1.Series[0].SetDataMembers("x", "y");
                var xyDiagram = (DevExpress.XtraCharts.XYDiagram)(chartControl1.Diagram);
                xyDiagram.AxisX.Title.Text     = rbK.Checked ? "主题个数K" : "超参数β";
                xyDiagram.EnableAxisYScrolling = true;
                xyDiagram.EnableAxisYZooming   = true;
                chartControl1.Titles[0].Text   = rbK.Checked ? "主题个数K对聚类的影响" : "超参数β对聚类的影响";

                //保存模型评估数据
                var sw = File.AppendText(finalData);
                for (int i = 0; i < table.Rows.Count; i++)
                {
                    sw.WriteLine(table.Rows[i]["x"] + "\t" + table.Rows[i]["y"]);
                }
                sw.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
                //throw;
            }
        }
Пример #2
0
        private void button2_Click(object sender, EventArgs e)
        {
            dir = Path.GetDirectoryName(Path.GetFullPath(textBox2.Text));
            StringBuilder sb = new StringBuilder();

            sb.Append("-est ");
            if (txtB.Text != "")
            {
                sb.AppendFormat(" -beta {0} ", txtB.Text);
            }
            if (txtK.Text != "")
            {
                sb.AppendFormat(" -ntopics {0} ", txtK.Text);
            }
            if (txtI.Text != "")
            {
                sb.AppendFormat(" -niters {0} ", txtI.Text);
            }
            if (txtA.Text != "")
            {
                sb.AppendFormat(" -alpha {0} ", txtA.Text);
            }
            sb.AppendFormat(" -savestep 300 -twords 15 -dfile {0} ", textBox2.Text);
            ProcessUtils.StartProcess("lda.exe", sb.ToString());
            var data = LDA.ReadTopicWordsWeight(Path.Combine(dir, "model-final.twords"));

            ShowWords(data);
            MessageBox.Show("模型训练完成,接下来请根据高频词填写对应的主题标签,并记得保存");
        }
Пример #3
0
        private void button3_Click(object sender, EventArgs e)
        {
            try
            {
                string dir      = Path.GetDirectoryName(Path.GetFullPath(textBox1.Text));
                string testPath = "ldaTestDoc.txt";
                string model    = Path.GetFileNameWithoutExtension(textBox1.Text);
                LDA.WriteLdaDocToFile(Preprocessor.SegmentRemoveStopWords(new string[] { textBox2.Text }.ToList(), false), Path.Combine(dir, testPath));
                string args = string.Format("-inf  -dir {0} -model {1} -dfile {2} -niters 20", dir, model, testPath);
                ProcessUtils.StartProcess("lda.exe", args);

                //控制台输出预测结果
                var docTopicArray = LDA.ReadDocumentTopicData(Path.Combine(dir, "ldaTestDoc.txt.theta"));//把预测结果读入内存

                Console.WriteLine("[{0}]读取主题映射...", DateTime.Now);
                var topicLabel = LDA.GetTopicLabel(Path.Combine(dir, "ldaTopicLabel.txt"));//这个话题标签在执行本句代码之前需要人工指定好,根据model-final.twords文件一一对照打上标签

                Console.WriteLine("[{0}]开始预测主题...", DateTime.Now);
                var topicIndex = LDA.GetDocmentTopicIndexList(docTopicArray);//根据预测结果挑选出每篇文档最可能的主题的索引

                label3.Text = topicLabel[topicIndex[0]];
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
                //throw;
            }
        }
Пример #4
0
        private void button1_Click(object sender, EventArgs e)
        {
            var docs = LDA.GetLDATrainDoc(Convert.ToInt32(textBox1.Text));
            var bow  = Preprocessor.SegmentRemoveStopWords(docs);

            LDA.WriteLdaDocToFile(bow, textBox2.Text);
            MessageBox.Show("已经生成验证集,接下来可以验证了");
        }
Пример #5
0
 private void button4_Click(object sender, EventArgs e)
 {
     string[] labels = new string[dataGridView1.Rows.Count];
     for (int i = 0; i < dataGridView1.Rows.Count; i++)
     {
         labels[i] = dataGridView1[2, i].Value.ToString();
     }
     LDA.SetTopicLabel(labels, Path.Combine(dir, "ldaTopicLabel.txt"));
     MessageBox.Show("模型保存成功");
 }
Пример #6
0
        private void button2_Click(object sender, EventArgs e)
        {
            try
            {
                var fullPath          = Path.GetFullPath(textBox1.Text);
                var dir               = Path.GetDirectoryName(fullPath);
                var inferDocFile      = "ldaInferDoc.txt";
                var thetaFile         = inferDocFile + ".theta";
                var topicLabelFile    = "ldaTopicLabel.txt";
                var inferDocAbsPath   = Path.Combine(dir, inferDocFile);
                var thetaAbsPath      = Path.Combine(dir, thetaFile);
                var topicLabelAbsPath = Path.Combine(dir, topicLabelFile);

                Console.WriteLine("[{0}]开始预测主题...", DateTime.Now);

                List <string> tidList = null;
                Console.WriteLine("[{0}]获取微博...", DateTime.Now);
                var inferOriginalDocs = LDA.GetLDAInferDoc(out tidList);

                Console.WriteLine("[{0}]预处理...", DateTime.Now);
                var inferDocs = Preprocessor.SegmentRemoveStopWords(inferOriginalDocs, false);

                Console.WriteLine("[{0}]写入文件...", DateTime.Now);
                LDA.WriteLdaDocToFile(inferDocs, inferDocAbsPath);

                Console.WriteLine("[{0}]吉布斯采样...", DateTime.Now);
                string args = string.Format("-inf  -dir {0} -model {1} -dfile {2} -niters 20", dir, "model-final", inferDocFile);
                ProcessUtils.StartProcess("lda.exe", args);

                //控制台输出预测结果
                Console.WriteLine("[{0}]读取预测结果...", DateTime.Now);
                var docTopicArray = LDA.ReadDocumentTopicData(thetaAbsPath);//把预测结果读入内存

                Console.WriteLine("[{0}]读取主题映射...", DateTime.Now);
                var topicLabel = LDA.GetTopicLabel(topicLabelAbsPath);//这个话题标签在执行本句代码之前需要人工指定好,根据model-final.twords文件一一对照打上标签

                Console.WriteLine("[{0}]开始预测主题...", DateTime.Now);
                var topicIndex = LDA.GetDocmentTopicIndexList(docTopicArray);//根据预测结果挑选出每篇文档最可能的主题的索引

                Console.WriteLine("[{0}]以下是微博的主题预测情况:", DateTime.Now);
                LDA.PrintTopics(inferOriginalDocs, topicIndex, topicLabel);

                //保存数据库
                Console.WriteLine("[{0}]开始存储主题...", DateTime.Now);
                LDA.SaveTopicToDB(topicIndex, topicLabel, tidList);
                MessageBox.Show("任务已完成!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message, "出错啦", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }