Exemplo n.º 1
0
        private void btnSaveDictionary_Click(object sender, EventArgs e)
        {
            string floder = Path.GetFullPath(String.Format(@"{0}\dict", CachePathDAL.GetWorkSpacePath()));

            if (!Directory.Exists(floder))
            {
                Directory.CreateDirectory(floder);
            }


            string filename1 = Path.GetFullPath(String.Format(@"{0}\dict\{1}", CachePathDAL.GetWorkSpacePath(), "CharBond.coll"));

            AppendText(String.Format("请稍候,正在保存文件:{0}", filename1));
            DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl);
            SerialLib.SerializeBinary <MemoryBondColl <string> >(objCharBondColl, filename1);

            string filename2 = Path.GetFullPath(String.Format(@"{0}\dict\{1}", CachePathDAL.GetWorkSpacePath(), "KeyWord.coll"));

            AppendText(String.Format("请稍候,正在保存文件:{0}", filename2));
            DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl);
            SerialLib.SerializeBinary <MemoryItemColl <string> >(objKeyWordColl, filename2);

            AppendText("字典集保存完毕!");
        }
Exemplo n.º 2
0
        private void CatchWordIndexColl()
        {
            objCharBondColl.MinuteOffsetSize = Convert.ToInt32(nmbReadSpeed.Value) * 60 * 60 * 24 * 6;
            objKeyWordColl.MinuteOffsetSize  = Convert.ToInt32(nmbReadSpeed.Value) * 60 * 60 * 24 * 6;
            Encoding objEncoding = GetEncoding();

            if (!String.IsNullOrWhiteSpace(this.tbCoreWordList.Text))
            {
                if (radSegment.Checked)
                {
                    AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(this.tbCoreWordList.Text, objCharBondColl, objKeyWordColl, 4, true, true)));
                    return;
                }
            }

            DateTime dtStartTime = DateTime.Now;

            if (radFileMode.Checked)
            {
                string sPathFile = this.tbFilePath.Text;
                if (File.Exists(sPathFile))
                {
                    this.AppendText(String.Format("【进行】{0}", sPathFile));

                    FileInfo info            = new FileInfo(sPathFile);
                    double   dFileCharLength = info.Length / 2 + 1;
                    double   dLoadCharCount  = 0;
                    double   dTempCharCount  = objCharBondColl.OffsetTotalCount;

                    DateTime dtUpdateTime = DateTime.Now;
                    this.progressBar1.Maximum = Convert.ToInt32(dFileCharLength);
                    this.progressBar1.Minimum = 0;
                    this.progressBar1.Value   = this.progressBar1.Minimum;
                    using (StreamReader sr = new StreamReader(sPathFile, objEncoding))
                    {
                        string line = null;
                        while ((line = sr.ReadLine()) != null)
                        {
                            Application.DoEvents();

                            if (!String.IsNullOrWhiteSpace(line))
                            {
                                line = Regex.Replace(Regex.Replace(line, @"\p{C}+", ""), "<.*?>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);                                  //去除可能的控制符、Html标签
                            }
                            if (String.IsNullOrWhiteSpace(line))
                            {
                                continue;
                            }


                            dLoadCharCount         += line.Length;
                            dTempCharCount         += line.Length;
                            this.progressBar1.Value = this.progressBar1.Maximum > dLoadCharCount?Convert.ToInt32(dLoadCharCount) : this.progressBar1.Maximum - 1;

                            string text = line; //这里可以再做一些需要特别处理的数据清洗,如多余的空格等

                            if (!String.IsNullOrEmpty(text))
                            {
                                if (this.chkDelayTime.Checked)
                                {
                                    dtUpdateTime = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                                }

                                if (radDictionary.Checked)
                                {
                                    //当数据跑过一个周期的数据时清理一次邻键集、词库,避免内存空间不足
                                    if (dTempCharCount > objCharBondColl.MinuteOffsetSize)
                                    {
                                        DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl);
                                        DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl);
                                        dTempCharCount = 0;
                                    }

                                    WordDictBLL.UpdateKeyWordColl(text, objCharBondColl, objKeyWordColl);
                                }

                                if (radSegment.Checked)
                                {
                                    AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(text, objCharBondColl, objKeyWordColl, 4, true, true)));
                                }
                            }
                        }
                    }
                    this.progressBar1.Value = this.progressBar1.Maximum;
                    this.progressBar1.Value = this.progressBar1.Minimum;
                    if (this.chkDelayTime.Checked)
                    {
                        dtPickerUpdateTime.Value = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                    }
                }
            }

            if (radFloderMode.Checked)
            {
                if (Directory.Exists(this.tbFilePath.Text))
                {
                    double   dTempCharCount = objCharBondColl.OffsetTotalCount;
                    string[] objFileColl    = Directory.GetFiles(this.tbFilePath.Text, "*.txt", SearchOption.AllDirectories);
                    foreach (string sPathFile in objFileColl)
                    {
                        if (File.Exists(sPathFile))
                        {
                            double dLoadCharCount = 0;
                            this.AppendText(String.Format("【进行】{0}", sPathFile));

                            FileInfo info            = new FileInfo(sPathFile);
                            double   dFileCharLength = info.Length / 2 + 1;

                            DateTime dtUpdateTime = DateTime.Now;
                            this.progressBar1.Maximum = Convert.ToInt32(dFileCharLength);
                            this.progressBar1.Minimum = 0;
                            this.progressBar1.Value   = this.progressBar1.Minimum;
                            using (StreamReader sr = new StreamReader(sPathFile, objEncoding))
                            {
                                string line = null;
                                while ((line = sr.ReadLine()) != null)
                                {
                                    Application.DoEvents();

                                    if (!String.IsNullOrWhiteSpace(line))
                                    {
                                        line = Regex.Replace(Regex.Replace(line, @"\p{C}+", ""), "<.*?>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);                                  //去除可能的控制符、Html标签
                                    }
                                    if (String.IsNullOrWhiteSpace(line))
                                    {
                                        continue;
                                    }


                                    dLoadCharCount         += line.Length;
                                    dTempCharCount         += line.Length;
                                    this.progressBar1.Value = this.progressBar1.Maximum > dLoadCharCount?Convert.ToInt32(dLoadCharCount) : this.progressBar1.Maximum - 1;

                                    string text = line; //这里可以再做一些需要特别处理的数据清洗,如多余的空格等

                                    if (!String.IsNullOrEmpty(text))
                                    {
                                        if (this.chkDelayTime.Checked)
                                        {
                                            dtUpdateTime = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                                        }

                                        if (radDictionary.Checked)
                                        {
                                            //当数据跑过一个周期的数据时清理一次邻键集、词库,避免内存空间不足
                                            if (dTempCharCount > objCharBondColl.MinuteOffsetSize)
                                            {
                                                DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl);
                                                DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl);
                                                dTempCharCount = 0;
                                            }

                                            WordDictBLL.UpdateKeyWordColl(text, objCharBondColl, objKeyWordColl);
                                        }

                                        if (radSegment.Checked)
                                        {
                                            AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(text, objCharBondColl, objKeyWordColl, 4, true, true)));
                                        }
                                    }
                                }
                            }
                            this.progressBar1.Value = this.progressBar1.Maximum;
                            this.progressBar1.Value = this.progressBar1.Minimum;
                            if (this.chkDelayTime.Checked)
                            {
                                dtPickerUpdateTime.Value = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value);
                            }
                        }
                    }
                }
            }
            AppendText(String.Format("完成,共用时{0}秒。", (DateTime.Now - dtStartTime).ToString()));
        }