Пример #1
0
        public static Dictionary <string, WordPair> getTF(string str, bool all = true)
        {
            List <WordPair> words             = WordCutTool.cut(str, CutTool.nlpir);
            Dictionary <string, WordPair> res = new Dictionary <string, WordPair>();

            foreach (var w in words)
            {
                string flag = w.Flag.ToUpper();
                if (all && !flag.StartsWith("N") && !flag.StartsWith("V"))
                {
                    continue;
                }
                if (flag.StartsWith("W"))
                {
                    continue;
                }

                if (!res.ContainsKey(w.Word))
                {
                    res[w.Word] = new WordPair(w); res[w.Word].Num = 1;
                }
                else
                {
                    res[w.Word].Num += 1;
                }
            }
            return(res);
        }
Пример #2
0
        public static Dictionary <string, WordDF> getIDF(Dictionary <string, WordDF> dfs = null, string[] files = null)
        {
            //string[] files = Directory.GetFiles(dataDic, "*.txt");
            if (files == null || files.Length <= 0)
            {
                return(new Dictionary <string, WordDF>());
            }
            int filenum = files.Length;

            if (dfs == null)
            {
                dfs = new Dictionary <string, WordDF>();
            }

            int oldfilenum = 0;

            foreach (var df in dfs)
            {
                if (df.Value.dn > oldfilenum)
                {
                    oldfilenum = df.Value.dn;
                }
            }

            foreach (string file in files)
            {
                string filecontent = File.ReadAllText(file, TxtIOController.getEncoding2(file));
                var    pairs       = WordCutTool.cut(filecontent, CutTool.nlpir);
                Dictionary <string, WordPair> wordpairs = new Dictionary <string, WordPair>();
                foreach (var p in pairs)
                {
                    wordpairs[p.Word] = p;
                }
                foreach (var word in wordpairs)
                {
                    if (word.Value.Flag.ToUpper().StartsWith("N") || word.Value.Flag.ToUpper().StartsWith("V"))
                    {
                        // 只分析名词和动词
                        if (!dfs.ContainsKey(word.Key))
                        {
                            dfs[word.Key] = new WordDF(word.Key, 0, 0);
                        }
                        dfs[word.Key].n += 1;
                    }
                }
            }
            foreach (var key in dfs.Keys)
            {
                dfs[key].dn = oldfilenum + filenum;
            }
            return(dfs);
        }