public static Dictionary <string, WordPair> getTF(string str, bool all = true) { List <WordPair> words = WordCutTool.cut(str, CutTool.nlpir); Dictionary <string, WordPair> res = new Dictionary <string, WordPair>(); foreach (var w in words) { string flag = w.Flag.ToUpper(); if (all && !flag.StartsWith("N") && !flag.StartsWith("V")) { continue; } if (flag.StartsWith("W")) { continue; } if (!res.ContainsKey(w.Word)) { res[w.Word] = new WordPair(w); res[w.Word].Num = 1; } else { res[w.Word].Num += 1; } } return(res); }
public static Dictionary <string, WordDF> getIDF(Dictionary <string, WordDF> dfs = null, string[] files = null) { //string[] files = Directory.GetFiles(dataDic, "*.txt"); if (files == null || files.Length <= 0) { return(new Dictionary <string, WordDF>()); } int filenum = files.Length; if (dfs == null) { dfs = new Dictionary <string, WordDF>(); } int oldfilenum = 0; foreach (var df in dfs) { if (df.Value.dn > oldfilenum) { oldfilenum = df.Value.dn; } } foreach (string file in files) { string filecontent = File.ReadAllText(file, TxtIOController.getEncoding2(file)); var pairs = WordCutTool.cut(filecontent, CutTool.nlpir); Dictionary <string, WordPair> wordpairs = new Dictionary <string, WordPair>(); foreach (var p in pairs) { wordpairs[p.Word] = p; } foreach (var word in wordpairs) { if (word.Value.Flag.ToUpper().StartsWith("N") || word.Value.Flag.ToUpper().StartsWith("V")) { // 只分析名词和动词 if (!dfs.ContainsKey(word.Key)) { dfs[word.Key] = new WordDF(word.Key, 0, 0); } dfs[word.Key].n += 1; } } } foreach (var key in dfs.Keys) { dfs[key].dn = oldfilenum + filenum; } return(dfs); }