private void btnSaveDictionary_Click(object sender, EventArgs e) { string floder = Path.GetFullPath(String.Format(@"{0}\dict", CachePathDAL.GetWorkSpacePath())); if (!Directory.Exists(floder)) { Directory.CreateDirectory(floder); } string filename1 = Path.GetFullPath(String.Format(@"{0}\dict\{1}", CachePathDAL.GetWorkSpacePath(), "CharBond.coll")); AppendText(String.Format("请稍候,正在保存文件:{0}", filename1)); DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl); SerialLib.SerializeBinary <MemoryBondColl <string> >(objCharBondColl, filename1); string filename2 = Path.GetFullPath(String.Format(@"{0}\dict\{1}", CachePathDAL.GetWorkSpacePath(), "KeyWord.coll")); AppendText(String.Format("请稍候,正在保存文件:{0}", filename2)); DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl); SerialLib.SerializeBinary <MemoryItemColl <string> >(objKeyWordColl, filename2); AppendText("字典集保存完毕!"); }
private void CatchWordIndexColl() { objCharBondColl.MinuteOffsetSize = Convert.ToInt32(nmbReadSpeed.Value) * 60 * 60 * 24 * 6; objKeyWordColl.MinuteOffsetSize = Convert.ToInt32(nmbReadSpeed.Value) * 60 * 60 * 24 * 6; Encoding objEncoding = GetEncoding(); if (!String.IsNullOrWhiteSpace(this.tbCoreWordList.Text)) { if (radSegment.Checked) { AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(this.tbCoreWordList.Text, objCharBondColl, objKeyWordColl, 4, true, true))); return; } } DateTime dtStartTime = DateTime.Now; if (radFileMode.Checked) { string sPathFile = this.tbFilePath.Text; if (File.Exists(sPathFile)) { this.AppendText(String.Format("【进行】{0}", sPathFile)); FileInfo info = new FileInfo(sPathFile); double dFileCharLength = info.Length / 2 + 1; double dLoadCharCount = 0; double dTempCharCount = objCharBondColl.OffsetTotalCount; DateTime dtUpdateTime = DateTime.Now; this.progressBar1.Maximum = Convert.ToInt32(dFileCharLength); this.progressBar1.Minimum = 0; this.progressBar1.Value = this.progressBar1.Minimum; using (StreamReader sr = new StreamReader(sPathFile, objEncoding)) { string line = null; while ((line = sr.ReadLine()) != null) { Application.DoEvents(); if (!String.IsNullOrWhiteSpace(line)) { line = Regex.Replace(Regex.Replace(line, @"\p{C}+", ""), "<.*?>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); //去除可能的控制符、Html标签 } if (String.IsNullOrWhiteSpace(line)) { continue; } dLoadCharCount += line.Length; dTempCharCount += line.Length; this.progressBar1.Value = this.progressBar1.Maximum > dLoadCharCount?Convert.ToInt32(dLoadCharCount) : this.progressBar1.Maximum - 1; string text = line; //这里可以再做一些需要特别处理的数据清洗,如多余的空格等 if (!String.IsNullOrEmpty(text)) { if (this.chkDelayTime.Checked) { dtUpdateTime = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value); } if (radDictionary.Checked) { //当数据跑过一个周期的数据时清理一次邻键集、词库,避免内存空间不足 if (dTempCharCount > objCharBondColl.MinuteOffsetSize) { DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl); DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl); dTempCharCount = 0; } WordDictBLL.UpdateKeyWordColl(text, objCharBondColl, objKeyWordColl); } if (radSegment.Checked) { AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(text, objCharBondColl, objKeyWordColl, 4, true, true))); } } } } this.progressBar1.Value = this.progressBar1.Maximum; this.progressBar1.Value = this.progressBar1.Minimum; if (this.chkDelayTime.Checked) { dtPickerUpdateTime.Value = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value); } } } if (radFloderMode.Checked) { if (Directory.Exists(this.tbFilePath.Text)) { double dTempCharCount = objCharBondColl.OffsetTotalCount; string[] objFileColl = Directory.GetFiles(this.tbFilePath.Text, "*.txt", SearchOption.AllDirectories); foreach (string sPathFile in objFileColl) { if (File.Exists(sPathFile)) { double dLoadCharCount = 0; this.AppendText(String.Format("【进行】{0}", sPathFile)); FileInfo info = new FileInfo(sPathFile); double dFileCharLength = info.Length / 2 + 1; DateTime dtUpdateTime = DateTime.Now; this.progressBar1.Maximum = Convert.ToInt32(dFileCharLength); this.progressBar1.Minimum = 0; this.progressBar1.Value = this.progressBar1.Minimum; using (StreamReader sr = new StreamReader(sPathFile, objEncoding)) { string line = null; while ((line = sr.ReadLine()) != null) { Application.DoEvents(); if (!String.IsNullOrWhiteSpace(line)) { line = Regex.Replace(Regex.Replace(line, @"\p{C}+", ""), "<.*?>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); //去除可能的控制符、Html标签 } if (String.IsNullOrWhiteSpace(line)) { continue; } dLoadCharCount += line.Length; dTempCharCount += line.Length; this.progressBar1.Value = this.progressBar1.Maximum > dLoadCharCount?Convert.ToInt32(dLoadCharCount) : this.progressBar1.Maximum - 1; string text = line; //这里可以再做一些需要特别处理的数据清洗,如多余的空格等 if (!String.IsNullOrEmpty(text)) { if (this.chkDelayTime.Checked) { dtUpdateTime = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value); } if (radDictionary.Checked) { //当数据跑过一个周期的数据时清理一次邻键集、词库,避免内存空间不足 if (dTempCharCount > objCharBondColl.MinuteOffsetSize) { DictionaryDAL.ClearMemoryBondColl <string>(objCharBondColl); DictionaryDAL.ClearMemoryItemColl <string>(objKeyWordColl); dTempCharCount = 0; } WordDictBLL.UpdateKeyWordColl(text, objCharBondColl, objKeyWordColl); } if (radSegment.Checked) { AppendText(SegmentBLL.ShowSegment(SegmentBLL.Segment(text, objCharBondColl, objKeyWordColl, 4, true, true))); } } } } this.progressBar1.Value = this.progressBar1.Maximum; this.progressBar1.Value = this.progressBar1.Minimum; if (this.chkDelayTime.Checked) { dtPickerUpdateTime.Value = dtPickerUpdateTime.Value.AddSeconds(dLoadCharCount / (double)nmbReadSpeed.Value); } } } } } AppendText(String.Format("完成,共用时{0}秒。", (DateTime.Now - dtStartTime).ToString())); }