public static string Separate(ChineseDictionary dict,string text) { Delimiter="/"; bool isSeparated=false; int separateLength=0; StringBuilder sprText=new StringBuilder(text.Length+20); DateTime start =DateTime.Now; for(int i=0;i<text.Length;) { isSeparated=false; if(text.Length-i<dict.WordMaxLength) { separateLength=text.Length-i; } else { separateLength=dict.WordMaxLength; } string spr=text.Substring(i,separateLength); //如果是数字或者英文 if(Match.isNumberic(spr[0])||Match.isEnglish(spr[0])) { int index=1; bool isNumber=false; while(index<spr.Length&&Match.isNumberic(spr[index])) { index++; isNumber=true; } while(!isNumber&&index<spr.Length&&Match.isEnglish(spr[index])) { index++; } i+=index; sprText.Append(spr.Substring(0,index)+Delimiter); continue; } else { while(spr.Length>1) { if(dict.Find(spr)) { sprText.Append(spr+Delimiter); isSeparated=true; i+=spr.Length; break; } else { spr=spr.Substring(0,spr.Length-1); } } if(!isSeparated) { sprText.Append(spr[0]+Delimiter); i++; } } } timespan=DateTime.Now-start; return sprText.ToString(); }
private void btnOpenDict_Click(object sender, RoutedEventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = "文本文件(*.txt)|*.txt"; if (ofd.ShowDialog()==true) { chinesedict = ChineseDictionary.InitializeFormFile(ofd.FileName); FileStream fs = new FileStream(AppDomain.CurrentDomain.BaseDirectory+"Default.dict", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); formatter.Serialize(fs, chinesedict); fs.Close(); } }
void Window_Loaded(object sender, RoutedEventArgs e) { DateTime start=DateTime.Now; if(File.Exists(@"Default.dict")) { using(FileStream fs = new FileStream(@"Default.dict", FileMode.Open)) { BinaryFormatter formatter = new BinaryFormatter(); chinesedict = (ChineseDictionary)formatter.Deserialize(fs);// } } TimeSpan ts=DateTime.Now-start; tbTime.Text=ts.TotalMilliseconds.ToString(); }
public static ChineseDictionary InitializeFormFile(string filePath) { if(File.Exists(filePath)==false) { throw new Exception("File does not exist"); } ChineseDictionary dict=new ChineseDictionary(); StreamReader stream=new StreamReader(filePath,System.Text.Encoding.UTF8); while(!stream.EndOfStream) { dict.Add(stream.ReadLine()); } stream.Close(); return dict; }
/// <summary> /// 对未分词文本进行分词并进行词频统计,获取词频字典 /// </summary> /// <param name="text">待分词,统计的文本</param> /// <param name="dict">分词依据词典</param> /// <returns></returns> public static Dictionary<string, int> GetWordFrequency(string text,ChineseDictionary dict) { string sprtext = SeparateWord.Separate(dict,text); return GetWordFrequency(sprtext); }
public static string Separate(ChineseDictionary dict,string text,string delimiter) { Delimiter=delimiter; return Separate(dict,text); }