static void Anlyze(string sadFilePath, string happyFilePath, int dataSize) { Classifier classifier = new Classifier(); classifier.load("senti.csv"); initPOSIndex(); string[] sads = File.ReadAllLines(sadFilePath); string[] joys = File.ReadAllLines(happyFilePath); int erC = 0; int erS = 0; double total = 0; int n = 0; for (int i = 0; i < dataSize; i++) { string line = joys[i]; double[] P = convert(posDist(line)); double sent = classifier.BinClassify(P); //string sens = classifier.Classify(P); if (sent < 0) { erC++; Console.WriteLine(erC + " at C " + i); } else if (sent > 0) { total += sent; n++; } //if (sens != "positive") //{ // erS++; // Console.WriteLine(erS + " at S " + i); //} //string jline = joys[i]; //Console.WriteLine(total); //P = posDist(jline); //Console.WriteLine(sens); } Console.WriteLine("avg = " + (total / n)); }
static void BothTest(string testFilePath, string reportFilePath, int dataSize) { NGramClassifier NGC = new NGramClassifier(); NGC.Load("NGC.csv"); Classifier POSC = new Classifier(); POSC.load("sentiobj.csv"); initPOSIndex(); StreamWriter fs = new StreamWriter(reportFilePath, false); char[] space = { ' ', '\t' }; char[] underScore = { '_' }; string[] tData = File.ReadAllLines(testFilePath); if (tData.Length < dataSize) { dataSize = tData.Length; } double total = 0; int n = 0; int err = 0; int berr = 0; int btot = 0; double posTot = 0, negTot = 0; int posCnt = 0, negCnt = 0; int realPos = 0; int realNut = 0; fs.WriteLine("i" + "," + "tag" + "," + "score" + "," + "marker" + "," + "ngcScore" + "," + "posScore" + "," + "sens" + "," + "score" + "," + "tweet"); for (int i = 1; i < dataSize; i++) { string line = tData[i]; string[] lineParts = line.Split(space, 4); string taggedTweet = lineParts[3]; string[] tweetParts = taggedTweet.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); string tweet = ""; foreach (string twpart in tweetParts) { string word = twpart.Split("_".ToCharArray())[0]; tweet += word + " "; } taggedTweet = lineParts[3]; string tag = lineParts[0]; tag = tag.Split(underScore, 2)[0]; if (tag != "neutral") { int x = 0; } double[] P = convert(posDist(taggedTweet)); double ngcScore = NGC.score(tweet); double posScore = POSC.BinClassify(P); string sens = POSC.Classify(P); if (tag == "positive") { realPos++; } if (tag == "neutral") { realNut++; } double score = 0.0; double NPosScore = (posScore / 2) + .23; if (NPosScore > +1) { NPosScore = +.71; } if (NPosScore < -1) { NPosScore = -.6; } score = NPosScore + ngcScore; if (score > 0) { posTot += score; posCnt++; } else { negTot += score; negCnt++; } string marker = "neutral"; if (score < -0.5) { marker = "negative"; } else if (score > +0.5) { marker = "positive"; } if (tag != marker) { err++; } if (tag != "neutral" && marker != "neutral") { btot++; if (tag != marker) { berr++; } } fs.WriteLine(i + "," + tag + "," + score + "," + marker + "," + ngcScore + "," + posScore + "," + sens + "," + tweet); } fs.Close(); Console.WriteLine("err: " + err); Console.WriteLine("real biased: " + btot); Console.WriteLine("err on biased: " + berr); Console.WriteLine("err on biased ratio: " + ((double)berr / btot)); }
static void Test(string testFilePath, string reportFilePath, int dataSize) { Classifier classifier = new Classifier(); classifier.load("senti.csv"); Classifier objOpinCl = new Classifier(); objOpinCl.load("sentiobj.csv"); StreamWriter fs = new StreamWriter(reportFilePath, false); initPOSIndex(); char[] space = { ' ' }; char[] underScore = { '_' }; string[] tData = File.ReadAllLines(testFilePath); //string[] joys = File.ReadAllLines(happyFilePath); if (tData.Length < dataSize) { dataSize = tData.Length; } double total = 0; int n = 0; int err = 0, ers = 0, ero = 0; double posTot = 0, negTot = 0; int posCnt = 0, negCnt = 0; int realPos = 0; int realNut = 0; for (int i = 1; i < dataSize; i++) { string line = tData[i]; string[] lineParts = line.Split(space, 4); string tweet = lineParts[3]; string tag = lineParts[0]; tag = tag.Split(underScore, 2)[0]; double[] P = convert(posDist(tweet)); double sent = classifier.BinClassify(P); string sens = objOpinCl.Classify(P); double objSent = objOpinCl.BinClassify(P); Console.WriteLine(tag + " " + sent + " " + sens); /*if (tag != "neutral") * { * if (tag != sens) ers++; * }*/ if (sens == "objective") { sens = "neutral"; } if (tag != sens) { ers++; } if (tag == "positive") { realPos++; } if (tag == "neutral") { realNut++; } if (sent > 0) { posTot += sent; posCnt++; } else { negTot += sent; negCnt++; } string marker = "neutral"; if (sent < -1.2) { marker = "negative"; } else if (sent > 1.01) { marker = "positive"; } string ObSeMarker = "neutral"; if (objSent < -1.2) { ObSeMarker = "negative"; } else if (objSent > 1.01) { ObSeMarker = "positive"; } fs.WriteLine(i + "," + tag + "," + sent + "," + objSent + "," + sens + "," + tweet); if (tag != marker) { err++; } if (tag != ObSeMarker) { ero++; } } fs.Close(); Console.WriteLine("s err: " + ers + " b err:" + err + " obse err:" + ero); }
//Entity,KeyWords, Opiniongram Scores public void TEkwP(string tweetFilePath, string reportFilePath, int dataSize) { DateTime startTime = DateTime.Now; NGramClassifier NGC = new NGramClassifier(); NGC.Load("NGC.csv"); Classifier POSC = new Classifier(); POSC.load("sentiobj.csv"); Program.initPOSIndex(); StreamWriter fs = new StreamWriter(reportFilePath + ".csv", false); StreamWriter xfs = new StreamWriter(reportFilePath + ".xml", false); char[] space = { ' ', '\t' }; char[] underScore = { '_' }; string[] tData = File.ReadAllLines(tweetFilePath); if (tData.Length < dataSize || dataSize == -1) { dataSize = tData.Length; } double totalScore = 0; int posCnt = 0, negCnt = 0; int i; fs.WriteLine("i" + "," + "Tweet" + "," + "E" + "," + "kw" + "," + "Polarity Score" + "," + "marker" + "," + "ngcScore" + "," + "posScore" + "," + "sens"); xfs.WriteLine("<TEkwPs>"); Console.WriteLine("TEkwP"); Console.WriteLine("take up: lyzing tweets"); for (i = 0; i < dataSize; i++) { string line = tData[i]; string E = ""; string kw = ""; if (i % 100 == 0) { Console.Write("\r" + i); } //string[] lineParts = line.Split(space, 4); string taggedTweet = System.Text.RegularExpressions.Regex.Replace(line, @"[^\u0000-\u007F]", string.Empty);; string[] tweetParts = taggedTweet.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); string tweet = ""; int ti = 0; int prevEntityIndex = -999; string prevEntity = ""; foreach (string twpart in tweetParts) { string word = twpart.Split("_".ToCharArray())[0]; string pos = twpart.Split("_".ToCharArray())[1]; tweet += word + " "; word = CleanWord(word); if (isKeyWord(pos, word)) { kw += keyWordiFy(word) + ";"; } if (isEntity(pos, word)) { string bs = ""; if ((prevEntityIndex + 1 == ti) && !ContainFilter(prevEntity)) { bs = "<:< "; } E += bs + EntitiFy(word) + ";"; prevEntityIndex = ti; prevEntity = word; } ti++; } E = E.Replace(";<:< ", " "); taggedTweet = line; double[] P = Program.convert(Program.posDist(taggedTweet)); double ngcScore = NGC.score(tweet); double posScore = POSC.BinClassify(P); string sens = POSC.Classify(P); double pScore = 0.0; double NPosScore = (posScore / 2) + 0.20; if (NPosScore > +1) { NPosScore = +.85; } if (NPosScore < -1) { NPosScore = -.85; } pScore = NPosScore + ngcScore; if (pScore == double.NaN) { int q = 0;; } string marker = "neutral"; if (pScore <= -0.45) { marker = "negative"; negCnt++; } else if (pScore >= +0.45) { marker = "positive"; posCnt++; } pScore /= 2; totalScore += pScore; fs.WriteLine(i + ",\"" + tweet.Replace(",", "_CM_") + "\"," + E + "," + kw + "," + pScore + "," + marker + "," + ngcScore + "," + posScore + "," + sens); xfs.WriteLine("<TEkwP i='" + i + "' pScore='" + pScore + "' marker='" + marker + "'>"); xfs.WriteLine(" <T>" + WebUtility.HtmlEncode(tweet.Replace("_CM_", ",")) + "</T>"); xfs.WriteLine(" <E>" + WebUtility.HtmlEncode(E.Replace(";", ",")) + "</E>"); xfs.WriteLine(" <kw>" + WebUtility.HtmlEncode(kw.Replace(";", ",")) + "</kw>"); xfs.WriteLine("</TEkwP>"); } xfs.WriteLine("</TEkwPs>"); fs.Close(); xfs.Close(); DateTime endTime = DateTime.Now; TimeSpan ts = endTime - startTime; Console.WriteLine("\r" + dataSize + "\ndone! in " + ts + "\nStored TEkwP file at " + reportFilePath + ".csv \n\tand at " + reportFilePath + ".xml"); XmlDocument xDoc = new XmlDocument(); xDoc.Load(reportFilePath + ".xml"); xDoc.Normalize(); xDoc.Save(reportFilePath + ".xml"); }
static void OpinionGram(string TERM, string tweetFilePath, string reportFilePath, string reportXMLpath, int dataSize) { NGramClassifier NGC = new NGramClassifier(); NGC.Load("NGC.csv"); Classifier POSC = new Classifier(); POSC.load("sentiobj.csv"); initPOSIndex(); StreamWriter fs = new StreamWriter(reportFilePath, false); char[] space = { ' ', '\t' }; char[] underScore = { '_' }; string[] tData = File.ReadAllLines(tweetFilePath); if (tData.Length < dataSize) { dataSize = tData.Length; } double totalScore = 0; double posTot = 0, negTot = 0; int posCnt = 0, negCnt = 0; int i; fs.WriteLine("i" + "," + "score" + "," + "marker" + "," + "ngcScore" + "," + "posScore" + "," + "sens" + "," + "tweet"); for (i = 0; i < dataSize; i++) { string line = tData[i]; //string[] lineParts = line.Split(space, 4); string taggedTweet = line; string[] tweetParts = taggedTweet.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); string tweet = ""; foreach (string twpart in tweetParts) { string word = twpart.Split("_".ToCharArray())[0]; tweet += word + " "; } taggedTweet = line; double[] P = convert(posDist(taggedTweet)); double ngcScore = NGC.score(tweet); double posScore = POSC.BinClassify(P); string sens = POSC.Classify(P); double score = 0.0; double NPosScore = (posScore / 2) + 0.0; if (NPosScore > +1) { NPosScore = +.85; } if (NPosScore < -1) { NPosScore = -.85; } score = NPosScore + ngcScore; if (score == double.NaN) { int q = 0;; } string marker = "neutral"; if (score < -0.25) { marker = "negative"; negCnt++; } else if (score > +0.25) { marker = "positive"; posCnt++; } totalScore += score; fs.WriteLine(i + "," + score + "," + marker + "," + ngcScore + "," + posScore + "," + sens + "," + tweet); } fs.Close(); Console.WriteLine("avg score = " + (totalScore / i)); Console.WriteLine("neg pct = " + (((double)negCnt) / i)); Console.WriteLine("pos pct = " + (((double)posCnt) / i)); Console.WriteLine("post count= " + i); StreamWriter XMLfs = new StreamWriter(reportXMLpath, false); XMLfs.WriteLine("<opinion entity='" + TERM + "'>"); XMLfs.WriteLine(" <score>" + (totalScore / i).ToString("F2") + "</score>"); XMLfs.WriteLine(" <analysis "); XMLfs.WriteLine(" post-count='" + i + "'"); XMLfs.WriteLine(" percent-positive='" + (((double)posCnt) * 100 / i).ToString("F2") + "'"); XMLfs.WriteLine(" percent-negative='" + (((double)negCnt) * 100 / i).ToString("F2") + "'" + " />"); XMLfs.WriteLine("</opinion>"); XMLfs.Close(); }