Пример #1
0
        /// <summary>
        /// 清洗训练样本,返回d和total
        /// </summary>
        /// <param name="negFilePath">负面词训练集</param>
        /// <param name="negWords">负面词库文本</param>
        /// <param name="posFilePath">正面词训练集</param>
        /// <param name="posWords">正面词库文本</param>
        /// <param name="d">存储正面和负面词集的字典</param>
        /// <param name="stopwords">排除词</param>
        public static void Train_data(string negWords, string negFilters, string posWords, string posFilters,
                                      ref Dictionary <string, AddOneProb> d, string stopwords)
        {
            List <Tuple <List <string>, string> > data = new List <Tuple <List <string>, string> >();
            var sent_cut = new Jieba();

            sent_cut.NegWords = negFilters;
            sent_cut.PosWords = posFilters;
            foreach (var sent in posWords.Replace("\r", "").Split('\n'))
            {
                if (string.IsNullOrWhiteSpace(sent))
                {
                    continue;
                }
                sent_cut.doc       = FilterSymbol(sent);
                sent_cut.stopwords = stopwords;
                var words = sent_cut.handle_sentiment(false);
                foreach (var word in words)
                {
                    d["pos"].Add(word, CoreCalculations.IDCount, 1);
                }
                if (words != null && words.Count > 0)
                {
                    data.Add(new Tuple <List <string>, string>(words, "pos"));
                }
            }
            Console.WriteLine("正面词库导入完毕");
            foreach (var sent in negWords.Replace("\r", "").Split('\n'))
            {
                if (string.IsNullOrWhiteSpace(sent))
                {
                    continue;
                }
                sent_cut.doc       = FilterSymbol(sent);
                sent_cut.stopwords = stopwords;
                var words = sent_cut.handle_sentiment();
                foreach (var word in words)
                {
                    d["neg"].Add(word, CoreCalculations.IDCount, 1);
                }
                if (words != null && words.Count > 0)
                {
                    data.Add(new Tuple <List <string>, string>(words, "neg"));
                }
            }
            Console.WriteLine("负面词库导入完毕");

            foreach (var k in d)
            {//计算频率
                k.Value.CalculatRate();
            }
            for (int i = 0; i < 2; i++)
            {
                foreach (var d_ in data)
                {
                    var c = d_.Item2.ToString();
                    CoreCalculations.Sensor(d, d_.Item1, c);//每一条数据做训练
                }
            }
        }
Пример #2
0
        static void Main(string[] args)
        {
            var sentimentJson = FileHandle.ReadTxtToEnd(SentimentFilepath + "sentiment_json.txt");
            var d             = Train.Load(sentimentJson);
            var posFilters    = FileHandle.ReadTxtToEnd(SentimentFilepath + "pos");         //正面词
            var posWords      = FileHandle.ReadTxtToEnd(SentimentFilepath + "pos_train");   //正面训练集
            var negFilters    = FileHandle.ReadTxtToEnd(SentimentFilepath + "neg");         //负面词
            var negWords      = FileHandle.ReadTxtToEnd(SentimentFilepath + "neg_train");   //负面训练集
            var stopWords     = FileHandle.ReadTxtToEnd(SentimentFilepath + "stopwords");   //不参与计算词

            Train.Train_data(negWords, negFilters, posWords, posFilters, ref d, stopWords); //导入训练集(可以自己根据需要替换训练集)

            var    test        = FileHandle.ReadTxtToEnd(SentimentFilepath + "testpos");    //测试集
            double countResult = 0;
            double posCount    = 0;
            double negCount    = 0;
            Dictionary <string, double> limitCount = new Dictionary <string, double>();

            limitCount.Add("neg", 0);
            limitCount.Add("pos", 0);
            foreach (var t in test.Replace("\r", "").Split('\n'))
            {
                if (string.IsNullOrWhiteSpace(t))
                {
                    continue;
                }
                countResult++;
                var    sent  = CoreCalculations.Classify_(t, d, stopWords);
                double limit = Math.Abs(sent["pos"] - sent["neg"]);
                if (sent["neg"] > sent["pos"])
                {
                    if (limit <= 0.2)
                    {
                        limitCount["neg"]++;
                    }
                    negCount++;
                }
                else
                {
                    if (limit <= 0.2)
                    {
                        limitCount["pos"]++;
                    }
                    posCount++;
                }
            }
            Console.WriteLine("模糊率:" + limitCount.Sum(l => l.Value) / countResult);
            Console.WriteLine("负面率:" + negCount / countResult);
            Console.WriteLine("正面率:" + posCount / countResult);
            countResult -= limitCount.Sum(l => l.Value);
            negCount    -= limitCount["neg"];
            posCount    -= limitCount["pos"];
            Console.WriteLine("去模糊后负面率:" + negCount / countResult);
            Console.WriteLine("去模糊后正面率:" + posCount / countResult);
            Console.ReadLine();
        }