Пример #1
0
 private void btnShowWordCloud_Click(object sender, EventArgs e)
 {
     if (radDictionary.Checked)
     {
         string result = WordDictBLL.ShowKeyWordWeightColl(objKeyWordColl, Convert.ToInt32(numericUpDown1.Value), chkOrderBy.Checked, chkIsOnlyWord.Checked);
         this.richTextBox1.Text = result;
     }
     if (radSegment.Checked)
     {
         using (StringReader sr = new StringReader(this.tbCoreWordList.Text))
         {
             StringBuilder sb = new StringBuilder();
             Dictionary <string, double> dictValue = new Dictionary <string, double>();
             string line = null;
             while ((line = sr.ReadLine()) != null)
             {
                 List <string> objKeyWordList = SegmentBLL.Segment(line, objCharBondColl, objKeyWordColl, 7, false, false);
                 foreach (string keyword in objKeyWordList)
                 {
                     if (!dictValue.ContainsKey(keyword))
                     {
                         dictValue.Add(keyword, !objKeyWordColl.Contains(keyword)?0: -Math.Log(objKeyWordColl[keyword].ValidCount / objKeyWordColl.MinuteOffsetSize));
                     }
                     else
                     {
                         dictValue[keyword] += !objKeyWordColl.Contains(keyword) ? 0 : -Math.Log(objKeyWordColl[keyword].ValidCount / objKeyWordColl.MinuteOffsetSize);
                     }
                 }
                 sb.AppendLine(SegmentBLL.ShowSegment(objKeyWordList));
             }
             sb.AppendLine();
             var buffer = from x in dictValue
                          orderby x.Value descending
                          select x;
             foreach (var x in buffer)
             {
                 sb.AppendLine(String.Format("【{0}】{1}", x.Key, Math.Round(x.Value, 4)));
             }
             this.richTextBox1.Text = sb.ToString();
         }
     }
 }
Пример #2
0
        private void CatchWordIndexColl()
        {
            objCharBondColl.MinuteOffsetSize = Convert.ToInt32(nmbReadSpeed.Value) * 60 * 60 * 24 * 6;
            objKeyWordColl.MinuteOffsetSize  = Convert.ToInt32(nmbReadSpeed.Value) * 60 * 60 * 24 * 6;
            Encoding objEncoding = GetEncoding();

            if (!String.IsNullOrWhiteSpace(this.tbCoreWordList.Text))
            {
                if (radSegment.Checked)
                {
                    AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(this.tbCoreWordList.Text, objCharBondColl, objKeyWordColl, 4, true, true)));
                    return;
                }
            }

            DateTime dtStartTime = DateTime.Now;

            if (radFileMode.Checked)
            {
                string sPathFile = this.tbFilePath.Text;
                if (File.Exists(sPathFile))
                {
                    this.AppendText(String.Format("【进行】{0}", sPathFile));

                    FileInfo info            = new FileInfo(sPathFile);
                    double   dFileCharLength = info.Length / 2 + 1;
                    double   dLoadCharCount  = 0;
                    double   dTempCharCount  = objCharBondColl.OffsetTotalCount;

                    DateTime dtUpdateTime = DateTime.Now;
                    this.progressBar1.Maximum = Convert.ToInt32(dFileCharLength);
                    this.progressBar1.Minimum = 0;
                    this.progressBar1.Value   = this.progressBar1.Minimum;
                    using (StreamReader sr = new StreamReader(sPathFile, objEncoding))
                    {
                        string line = null;
                        while ((line = sr.ReadLine()) != null)
                        {
                            Application.DoEvents();

                            if (!String.IsNullOrWhiteSpace(line))
                            {
                                line = Regex.Replace(Regex.Replace(line, @"\p{C}+", ""), "<.*?>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);                                  //去除可能的控制符、Html标签
                            }
                            if (String.IsNullOrWhiteSpace(line))
                            {
                                continue;
                            }


                            dLoadCharCount         += line.Length;
                            dTempCharCount         += line.Length;
                            this.progressBar1.Value = this.progressBar1.Maximum > dLoadCharCount?Convert.ToInt32(dLoadCharCount) : this.progressBar1.Maximum - 1;

                            string text = line; //这里可以再做一些需要特别处理的数据清洗,如多余的空格等

                            if (!String.IsNullOrEmpty(text))
                            {
                                if (this.chkDelayTime.Checked)
                                {
                                    dtUpdateTime = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                                }

                                if (radDictionary.Checked)
                                {
                                    //当数据跑过一个周期的数据时清理一次邻键集、词库,避免内存空间不足
                                    if (dTempCharCount > objCharBondColl.MinuteOffsetSize)
                                    {
                                        DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl);
                                        DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl);
                                        dTempCharCount = 0;
                                    }

                                    WordDictBLL.UpdateKeyWordColl(text, objCharBondColl, objKeyWordColl);
                                }

                                if (radSegment.Checked)
                                {
                                    AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(text, objCharBondColl, objKeyWordColl, 4, true, true)));
                                }
                            }
                        }
                    }
                    this.progressBar1.Value = this.progressBar1.Maximum;
                    this.progressBar1.Value = this.progressBar1.Minimum;
                    if (this.chkDelayTime.Checked)
                    {
                        dtPickerUpdateTime.Value = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                    }
                }
            }

            if (radFloderMode.Checked)
            {
                if (Directory.Exists(this.tbFilePath.Text))
                {
                    double   dTempCharCount = objCharBondColl.OffsetTotalCount;
                    string[] objFileColl    = Directory.GetFiles(this.tbFilePath.Text, "*.txt", SearchOption.AllDirectories);
                    foreach (string sPathFile in objFileColl)
                    {
                        if (File.Exists(sPathFile))
                        {
                            double dLoadCharCount = 0;
                            this.AppendText(String.Format("【进行】{0}", sPathFile));

                            FileInfo info            = new FileInfo(sPathFile);
                            double   dFileCharLength = info.Length / 2 + 1;

                            DateTime dtUpdateTime = DateTime.Now;
                            this.progressBar1.Maximum = Convert.ToInt32(dFileCharLength);
                            this.progressBar1.Minimum = 0;
                            this.progressBar1.Value   = this.progressBar1.Minimum;
                            using (StreamReader sr = new StreamReader(sPathFile, objEncoding))
                            {
                                string line = null;
                                while ((line = sr.ReadLine()) != null)
                                {
                                    Application.DoEvents();

                                    if (!String.IsNullOrWhiteSpace(line))
                                    {
                                        line = Regex.Replace(Regex.Replace(line, @"\p{C}+", ""), "<.*?>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);                                  //去除可能的控制符、Html标签
                                    }
                                    if (String.IsNullOrWhiteSpace(line))
                                    {
                                        continue;
                                    }


                                    dLoadCharCount         += line.Length;
                                    dTempCharCount         += line.Length;
                                    this.progressBar1.Value = this.progressBar1.Maximum > dLoadCharCount?Convert.ToInt32(dLoadCharCount) : this.progressBar1.Maximum - 1;

                                    string text = line; //这里可以再做一些需要特别处理的数据清洗,如多余的空格等

                                    if (!String.IsNullOrEmpty(text))
                                    {
                                        if (this.chkDelayTime.Checked)
                                        {
                                            dtUpdateTime = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                                        }

                                        if (radDictionary.Checked)
                                        {
                                            //当数据跑过一个周期的数据时清理一次邻键集、词库,避免内存空间不足
                                            if (dTempCharCount > objCharBondColl.MinuteOffsetSize)
                                            {
                                                DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl);
                                                DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl);
                                                dTempCharCount = 0;
                                            }

                                            WordDictBLL.UpdateKeyWordColl(text, objCharBondColl, objKeyWordColl);
                                        }

                                        if (radSegment.Checked)
                                        {
                                            AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(text, objCharBondColl, objKeyWordColl, 4, true, true)));
                                        }
                                    }
                                }
                            }
                            this.progressBar1.Value = this.progressBar1.Maximum;
                            this.progressBar1.Value = this.progressBar1.Minimum;
                            if (this.chkDelayTime.Checked)
                            {
                                dtPickerUpdateTime.Value = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                            }
                        }
                    }
                }
            }
            AppendText(String.Format("完成,共用时{0}秒。", (DateTime.Now - dtStartTime).ToString()));
        }