예제 #1
0
        /**
         * 对给定的文本进行分类
         * @param text 给定的文本
         * @return 分类结果 1  2 3 4 0
         */
        public static Int16 Classify(String text)
        {
            var           terms = BaseNLP.CutWord(text);             //中文分词处理(分词后结果可能还包含有停用词)
            List <String> term  = DropStopWords(terms);              //去掉停用词,以免影响分类
            // String Classes = tdm.getTraningClassifications();//分类
            List <ClassifyResult> crs = new List <ClassifyResult>(); //分类结果
            Int32 ValidWordNum        = 0;

            //MI=new double[terms.length][Classes.length];
            for (Int16 i = 0; i < 4; i++)
            {
                List <float> probility = new List <float>();
                // String Ci = Classes[i];//第i个分类
                int Ci = i;                     //第i个分类
                probility = CalcProd(term, Ci); //计算给定的文本属性向量terms在给定的分类Ci中的分类条件概率
                ClassifyResult cr = new ClassifyResult(probility, i);
                crs.Add(cr);
                ValidWordNum = probility.Count - 1;
            }

            if (!(Convert.ToDouble(ValidWordNum) / term.Count > 0.8 && ValidWordNum > 5))
            {
                foreach (var item in term)
                {
                    if (!IsKeyWord(item))
                    {
                        return(0);
                    }
                }
            }
            //对最后概率结果进行排序
            crs.Sort(ClassifyResult.Compare);
            //返回概率最大的分类
            return(++crs[3].classification);//先加再return
        }
예제 #2
0
        private Dictionary <String, int> CalcuCaseWordsFrequen(int casetpye)
        {
            Dictionary <String, int> WordsFrequency = new Dictionary <String, int>();

            foreach (var item in trainingSet[casetpye])
            {
                Dictionary <String, int> SentanceWordsFrequency = BaseNLP.SentanceTF(item);
                WordsFrequency = MergeDict(WordsFrequency, SentanceWordsFrequency);
            }
            return(WordsFrequency);
        }
예제 #3
0
 protected Dictionary <String, int> NormalTF(List <Pair> cutWordResult)
 {
     return(BaseNLP.TF(cutWordResult));
 }
예제 #4
0
 /**
  * 去掉停用词
  * @param text 给定的文本
  * @return 去停用词后结果
  */
 static List <String> DropStopWords(List <Pair> oldWords)
 {
     return(BaseNLP.DropStopWords(oldWords));
 }
예제 #5
0
 protected List <Pair> CutWord(String file)
 {
     return(BaseNLP.CutWord(file));
 }