예제 #1
0
        /// <summary>
        /// 清洗训练样本,返回d和total
        /// </summary>
        /// <param name="negFilePath">负面词训练集</param>
        /// <param name="negWords">负面词库文本</param>
        /// <param name="posFilePath">正面词训练集</param>
        /// <param name="posWords">正面词库文本</param>
        /// <param name="d">存储正面和负面词集的字典</param>
        /// <param name="stopwords">排除词</param>
        public static void Train_data(string negWords, string negFilters, string posWords, string posFilters,
                                      ref Dictionary <string, AddOneProb> d, string stopwords)
        {
            List <Tuple <List <string>, string> > data = new List <Tuple <List <string>, string> >();
            var sent_cut = new Jieba();

            sent_cut.NegWords = negFilters;
            sent_cut.PosWords = posFilters;
            foreach (var sent in posWords.Replace("\r", "").Split('\n'))
            {
                if (string.IsNullOrWhiteSpace(sent))
                {
                    continue;
                }
                sent_cut.doc       = FilterSymbol(sent);
                sent_cut.stopwords = stopwords;
                var words = sent_cut.handle_sentiment(false);
                foreach (var word in words)
                {
                    d["pos"].Add(word, CoreCalculations.IDCount, 1);
                }
                if (words != null && words.Count > 0)
                {
                    data.Add(new Tuple <List <string>, string>(words, "pos"));
                }
            }
            Console.WriteLine("正面词库导入完毕");
            foreach (var sent in negWords.Replace("\r", "").Split('\n'))
            {
                if (string.IsNullOrWhiteSpace(sent))
                {
                    continue;
                }
                sent_cut.doc       = FilterSymbol(sent);
                sent_cut.stopwords = stopwords;
                var words = sent_cut.handle_sentiment();
                foreach (var word in words)
                {
                    d["neg"].Add(word, CoreCalculations.IDCount, 1);
                }
                if (words != null && words.Count > 0)
                {
                    data.Add(new Tuple <List <string>, string>(words, "neg"));
                }
            }
            Console.WriteLine("负面词库导入完毕");

            foreach (var k in d)
            {//计算频率
                k.Value.CalculatRate();
            }
            for (int i = 0; i < 2; i++)
            {
                foreach (var d_ in data)
                {
                    var c = d_.Item2.ToString();
                    CoreCalculations.Sensor(d, d_.Item1, c);//每一条数据做训练
                }
            }
        }
예제 #2
0
        /// <summary>
        /// 执行分类
        /// </summary>
        /// <param name="sent"></param>
        /// <param name="d"></param>
        /// <param name="total"></param>
        /// <param name="stopwords"></param>
        /// <returns></returns>
        public static Dictionary <string, double> Classify_(string sent, Dictionary <string, AddOneProb> d, string stopwords)
        {
            Jieba jiebaword = new Jieba();

            jiebaword.doc       = Train.FilterSymbol(sent);
            jiebaword.stopwords = stopwords;
            return(Sensor(d, jiebaword.JiebaCut()));
        }