/// <summary> /// 训练现有样本,返回d和total /// </summary> /// <param name="negFilePath">负面词训练集路径</param> /// <param name="negWords">负面词库文本</param> /// <param name="posFilePath">正面词训练集路径</param> /// <param name="posWords">正面词库文本</param> /// <param name="d">存储正面和负面词集的字典</param> /// <param name="total">总数</param> /// <param name="stopwordFilepath">排除词路径</param> public static void Train_data(string negFilePath, string negWords, string posFilePath, string posWords, ref Dictionary <string, AddOneProb> d, ref double total, string stopwordFilepath) { //d = new Dictionary<string, AddOneProb>() { { "pos", new AddOneProb() }, { "neg", new AddOneProb() } }; string negfile = "", posfile = ""; using (var sr1 = new StreamReader(negFilePath, Encoding.Default)) negfile = sr1.ReadToEnd(); using (var sr2 = new StreamReader(posFilePath, Encoding.Default)) posfile = sr2.ReadToEnd(); string stopwords = ReadTxtToEnd(stopwordFilepath); List <Tuple <List <string>, string> > data = new List <Tuple <List <string>, string> >(); var sent_cut = new Jieba(); sent_cut.NegWords = negWords; sent_cut.PosWords = posWords; foreach (var sent in posfile.Replace("\r", "").Split('\n')) { sent_cut.doc = sent; sent_cut.stopwords = stopwords; //<Question>why not work //var data_pos = new Tuple<List<string>,string>(); //</Question> data.Add(new Tuple <List <string>, string>(sent_cut.handle_sentiment(), "pos")); } Console.WriteLine("正面词库导入完毕"); foreach (var sent in negfile.Replace("\r", "").Split('\n')) { sent_cut.doc = sent; sent_cut.stopwords = stopwords; data.Add(new Tuple <List <string>, string>(sent_cut.handle_sentiment(false), "neg")); } Console.WriteLine("负面词库导入完毕"); foreach (var d_ in data) { var c = d_.Item2.ToString(); if (d_.Item1 == null) { continue; } else { foreach (var word in d_.Item1) { d[c].add(word, 1); } } } ///<question>字典所有值求和 //d.Sum(x=>d[x]) ///</question> total = 0; foreach (var value in d.Values) { total += value.total; } }
public static double classify_(string sent, Dictionary <string, AddOneProb> d, double total, string stopwordFilepath) { Jieba jiebaword = new Jieba(); jiebaword.doc = sent; jiebaword.stopwords = ReadTxtToEnd(stopwordFilepath); var retprob = Classify(jiebaword.JiebaCut(), d, total); if (retprob.Item1 == "pos") { return(retprob.Item2); } else { return(1 - retprob.Item2); } }