Пример #1
0
        //returns 1 for a positive prediciton 0 for negative prediction, and -1 if there is not enough certainty
        public int makeLogPrediction(List <string> input)
        {
            tokenizer     tokenizeObj = new tokenizer();
            List <string> tokenized = input;
            List <int>    posInstances = new List <int>(), negInstances = new List <int>();
            string        tempTuple, tempTriple;

            //we check each token to see if it exists in the given dictionary, if it does, add the amount of instances to the vector
            //here we change 'i' depending on the start location of the text to be analyzed within the tokens
            for (int i = 0; i < tokenized.Count; i++)
            {
                //this means we have to make a tuple
                if (i < tokenized.Count - 1)
                {
                    //this makes tempTuple a way to search the dictionary for the given tuple
                    tempTuple = tokenized[i] + " " + tokenized[i + 1];
                    if (positiveTuple.ContainsKey(tempTuple))//  positiveTuple.find(tempTuple) != positiveTuple.end())
                    {
                        posInstances.Add(positiveTuple[tempTuple]);
                    }
                    else
                    {
                        posInstances.Add(1);
                    }

                    if (negativeTuple.ContainsKey(tempTuple))//  negativeTuple.find(tempTuple) != negativeTuple.end())
                    {
                        negInstances.Add(negativeTuple[tempTuple]);
                    }
                    else
                    {
                        negInstances.Add(1);
                    }
                }
                //this means make a triple
                if (i < tokenized.Count - 2 && tokenized.Count != 1)
                {
                    //this makes tempTriple a way to search the dictionary for the given tuple
                    tempTriple = tokenized[i] + " " + tokenized[i + 1] + " " + tokenized[i + 2];
                    if (positiveTriple.ContainsKey(tempTriple))//  positiveTriple.find(tempTriple) != positiveTriple.end())
                    {
                        posInstances.Add(positiveTriple[tempTriple]);
                    }
                    else
                    {
                        posInstances.Add(1);
                    }

                    if (negativeTriple.ContainsKey(tempTriple))//  negativeTriple.find(tempTriple) != negativeTriple.end())
                    {
                        negInstances.Add(negativeTriple[tempTriple]);
                    }
                    else
                    {
                        negInstances.Add(1);
                    }
                }

                if (positiveSingle.ContainsKey(tokenized[i]))//  positiveSingle.find(tokenized[i]) != positiveSingle.end())
                {
                    posInstances.Add(positiveSingle[tokenized[i]]);
                }
                else
                {
                    posInstances.Add(1);
                }

                if (negativeSingle.ContainsKey(tokenized[i]))//  negativeSingle.find(tokenized[i]) != negativeSingle.end())
                {
                    negInstances.Add(negativeSingle[tokenized[i]]);
                }
                else
                {
                    negInstances.Add(1);
                }
            }

            //here we get the summation of each positive instance we found divided by the total positive sentiment found
            double positiveNumerator = 0;

            for (int i = 0; i < posInstances.Count; i++)
            {
                positiveNumerator += Math.Log((double)posInstances[i] / (double)positiveSentimentTotal);
            }

            //here we get the summation of each negative instance we found divided by the total negative sentiment found
            double negativeNumerator = 0;

            for (int i = 0; i < negInstances.Count; i++)
            {
                negativeNumerator += Math.Log((double)negInstances[i] / (double)negativeSentimentTotal);
            }

            //now add the found product by the fraction of the total data points
            positiveNumerator += Math.Log((double)positiveSentimentTotal / ((double)positiveSentimentTotal + (double)negativeSentimentTotal));
            negativeNumerator += Math.Log((double)negativeSentimentTotal / ((double)positiveSentimentTotal + (double)negativeSentimentTotal));

            if (Math.Abs(positiveNumerator - negativeNumerator) < 2)
            {
                return(-1);
            }

            //return true for a positive prediction, negative for negative prediction
            if (positiveNumerator > negativeNumerator)
            {
                return(1);
            }
            else
            {
                return(0);
            }
        }
Пример #2
0
        public List <bool> runPrediction(List <string> toPredict)
        {
            List <bool>   ToRet = new List <bool>();
            tokenizer     t     = new tokenizer();
            List <string> tokens;
            int           retValue;
            List <int>    retVals   = new List <int>();
            List <string> badTweets = new List <string>();

            foreach (var tweet in toPredict)
            {
                tokens   = t.tokenize(tweet);
                retValue = makeLogPrediction(tokens);
                retVals.Add(retValue);

                //if the returned value for the tweet says that we did not have sufficient evidence to make a prediction, add it to this list
                if (retValue == -1)
                {
                    badTweets.Add(tweet);
                }
            }

            //create file for fastText to read from
            var badTweetsPath = ResourceHelper.WriteToTemp("badtweets.txt", badTweets);

            if (badTweets.Count > 0)
            {
                Console.WriteLine("Running fastText on: " + badTweets.Count.ToString() + " Tweets");
                runFastText.runPrediction(badTweetsPath);
                int        listCount = 0;
                List <int> badLabels = runFastText.getLabels();

                int tester = 0;

                for (int i = 0; i < retVals.Count; i++)
                {
                    tester = 1;

                    try
                    {
                        tester = 2;

                        if (retVals[i] == -1)
                        {
                            tester = 3;

                            retVals[i] = badLabels[listCount]; // badLabels.count == 0. FT isn't saving right

                            tester = 4;

                            listCount++;
                        }

                        tester = 5;


                        if (retVals[i] == 1)
                        {
                            tester = 6;

                            ToRet.Add(true);
                        }
                        else
                        {
                            tester = 7;

                            ToRet.Add(false);
                        }

                        tester = 8;
                    }
                    catch (Exception x)
                    {
                        string message = $@"
tester={tester}
i={i}
listCount={listCount}
retVal.Count={retVals.Count}
badLabels.Count={badLabels.Count}";

                        throw new Exception(message, x);
                    }
                }
            }

            return(ToRet);
        }