//按行读取输入文件并统计 public WordCalculate Input(WordCalculate datanumber, WordTrie wtrie) { FileStream fs = null; StreamReader sr = null; String dataline = String.Empty; try { fs = new FileStream(this.pathIn, FileMode.Open); sr = new StreamReader(fs); while ((dataline = sr.ReadLine()) != null) { datanumber.Calculate(dataline, wtrie); //按行统计数据 } } catch { Console.WriteLine("wrong!"); } finally { if (sr != null) { sr.Close(); } if (fs != null) { fs.Close(); } } return(datanumber); }
public static Result Maintest() { WordIO io = new WordIO(); WordCalculate datanumber = new WordCalculate(); WordTrie wtrie = new WordTrie(); Result res = new Result(); io.pathIn = "F:\\Demo.txt"; io.pathOut = "F:\\Result.txt"; datanumber = io.Input(datanumber, wtrie); //按行读取文件并统计 io.Output(datanumber, wtrie); res.charactersnumber = datanumber.charactersnumber; res.wordsnumber = datanumber.wordsnumber; res.linesnumber = datanumber.linesnumber; return(res); }
public long linesnumber = 0; //统计数据:行数 //数据统计 public void Calculate(string dataline, WordTrie wtrie) { if (string.IsNullOrEmpty(dataline)) { return; } string word = null; for (int i = 0, len = dataline.Length; i < len; i++) { char unit = dataline[i]; if (unit >= 65 && unit <= 90) { unit = (char)(unit + 32); } //大写字母转换成小写 if ((unit >= 48 && unit <= 57) || (unit >= 97 && unit <= 122)) { word = String.Concat(word, unit); } else { if (!string.IsNullOrEmpty(word)) //判断是否为词尾后的字符 { if ((word[0] >= 97 && word[0] <= 122)) { wtrie.Insert(word); } word = null; } } } if (!string.IsNullOrEmpty(word)) { if ((word[0] >= 97 && word[0] <= 122)) { wtrie.Insert(word); } word = null; } this.linesnumber++; //统计行数 this.wordsnumber = wtrie.CountSum; //统计单词数 this.charactersnumber += dataline.Length; //统计字符数 }
//将统计数据输出并写到输出文件 public void Output(WordCalculate datanumber, WordTrie wtrie) { FileStream fs = null; StreamWriter sw = null; List <WordTrie.ListUnit> WordList = new List <WordTrie.ListUnit>(); try { fs = new FileStream(this.pathOut, FileMode.Create); sw = new StreamWriter(fs); WordList = wtrie.Sort(); sw.WriteLine("字符总数为:{0}", datanumber.charactersnumber); sw.WriteLine("单词总数为:{0}", datanumber.wordsnumber); sw.WriteLine("有效行数为:{0}", datanumber.linesnumber); sw.WriteLine("\n词频\t单词\n"); Console.WriteLine("字符总数为:{0}", datanumber.charactersnumber); Console.WriteLine("单词总数为:{0}", datanumber.wordsnumber); Console.WriteLine("有效行数为:{0}", datanumber.linesnumber); Console.WriteLine("\n词频\t单词\n"); for (int i = 0; (i < 10 && i < WordList.Count); i++) { sw.WriteLine("{0}\t{1}", WordList[i].WordNum, WordList[i].Word); Console.WriteLine("{0}\t{1}", WordList[i].WordNum, WordList[i].Word); } } catch { Console.WriteLine("文档写入失败!"); } finally { if (sw != null) { sw.Close(); } if (fs != null) { fs.Close(); } } }