//returns 1 for a positive prediciton 0 for negative prediction, and -1 if there is not enough certainty public int makeLogPrediction(List <string> input) { tokenizer tokenizeObj = new tokenizer(); List <string> tokenized = input; List <int> posInstances = new List <int>(), negInstances = new List <int>(); string tempTuple, tempTriple; //we check each token to see if it exists in the given dictionary, if it does, add the amount of instances to the vector //here we change 'i' depending on the start location of the text to be analyzed within the tokens for (int i = 0; i < tokenized.Count; i++) { //this means we have to make a tuple if (i < tokenized.Count - 1) { //this makes tempTuple a way to search the dictionary for the given tuple tempTuple = tokenized[i] + " " + tokenized[i + 1]; if (positiveTuple.ContainsKey(tempTuple))// positiveTuple.find(tempTuple) != positiveTuple.end()) { posInstances.Add(positiveTuple[tempTuple]); } else { posInstances.Add(1); } if (negativeTuple.ContainsKey(tempTuple))// negativeTuple.find(tempTuple) != negativeTuple.end()) { negInstances.Add(negativeTuple[tempTuple]); } else { negInstances.Add(1); } } //this means make a triple if (i < tokenized.Count - 2 && tokenized.Count != 1) { //this makes tempTriple a way to search the dictionary for the given tuple tempTriple = tokenized[i] + " " + tokenized[i + 1] + " " + tokenized[i + 2]; if (positiveTriple.ContainsKey(tempTriple))// positiveTriple.find(tempTriple) != positiveTriple.end()) { posInstances.Add(positiveTriple[tempTriple]); } else { posInstances.Add(1); } if (negativeTriple.ContainsKey(tempTriple))// negativeTriple.find(tempTriple) != negativeTriple.end()) { negInstances.Add(negativeTriple[tempTriple]); } else { negInstances.Add(1); } } if (positiveSingle.ContainsKey(tokenized[i]))// positiveSingle.find(tokenized[i]) != positiveSingle.end()) { posInstances.Add(positiveSingle[tokenized[i]]); } else { posInstances.Add(1); } if (negativeSingle.ContainsKey(tokenized[i]))// negativeSingle.find(tokenized[i]) != negativeSingle.end()) { negInstances.Add(negativeSingle[tokenized[i]]); } else { negInstances.Add(1); } } //here we get the summation of each positive instance we found divided by the total positive sentiment found double positiveNumerator = 0; for (int i = 0; i < posInstances.Count; i++) { positiveNumerator += Math.Log((double)posInstances[i] / (double)positiveSentimentTotal); } //here we get the summation of each negative instance we found divided by the total negative sentiment found double negativeNumerator = 0; for (int i = 0; i < negInstances.Count; i++) { negativeNumerator += Math.Log((double)negInstances[i] / (double)negativeSentimentTotal); } //now add the found product by the fraction of the total data points positiveNumerator += Math.Log((double)positiveSentimentTotal / ((double)positiveSentimentTotal + (double)negativeSentimentTotal)); negativeNumerator += Math.Log((double)negativeSentimentTotal / ((double)positiveSentimentTotal + (double)negativeSentimentTotal)); if (Math.Abs(positiveNumerator - negativeNumerator) < 2) { return(-1); } //return true for a positive prediction, negative for negative prediction if (positiveNumerator > negativeNumerator) { return(1); } else { return(0); } }
public List <bool> runPrediction(List <string> toPredict) { List <bool> ToRet = new List <bool>(); tokenizer t = new tokenizer(); List <string> tokens; int retValue; List <int> retVals = new List <int>(); List <string> badTweets = new List <string>(); foreach (var tweet in toPredict) { tokens = t.tokenize(tweet); retValue = makeLogPrediction(tokens); retVals.Add(retValue); //if the returned value for the tweet says that we did not have sufficient evidence to make a prediction, add it to this list if (retValue == -1) { badTweets.Add(tweet); } } //create file for fastText to read from var badTweetsPath = ResourceHelper.WriteToTemp("badtweets.txt", badTweets); if (badTweets.Count > 0) { Console.WriteLine("Running fastText on: " + badTweets.Count.ToString() + " Tweets"); runFastText.runPrediction(badTweetsPath); int listCount = 0; List <int> badLabels = runFastText.getLabels(); int tester = 0; for (int i = 0; i < retVals.Count; i++) { tester = 1; try { tester = 2; if (retVals[i] == -1) { tester = 3; retVals[i] = badLabels[listCount]; // badLabels.count == 0. FT isn't saving right tester = 4; listCount++; } tester = 5; if (retVals[i] == 1) { tester = 6; ToRet.Add(true); } else { tester = 7; ToRet.Add(false); } tester = 8; } catch (Exception x) { string message = $@" tester={tester} i={i} listCount={listCount} retVal.Count={retVals.Count} badLabels.Count={badLabels.Count}"; throw new Exception(message, x); } } } return(ToRet); }