public void Createhashtag()
 {
     instance = WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "#word", null, POSTags.Instance.NN);
     Assert.AreEqual(NamedEntities.Hashtag, instance.Entity);
     instance.Entity = NamedEntities.Date;
     Assert.AreEqual(NamedEntities.Hashtag, instance.Entity);
 }
Esempio n. 2
0
        public TwitterProfile Train(TwitterProfile profile, TextContent content)
        {
            if (profile == null || content?.Text == null)
            {
                throw new ArgumentNullException();
            }

            var regex = new Regex(@"(\.|,| |!|\?)");

            var excluded = new Regex(@"(^@|^#|^;|^:|^http|^HTTP|\/|\\|…$|&\S+|^\""$|^\"".*[^\""]$|^[^\""].*\""$|^RT$)");
            //TODO move remove @ and # to tweet generator instead of trainer

            var words = regex.Split(content.Text)
                        .Where(word => !string.IsNullOrWhiteSpace(word))
                        .Where(word => !excluded.IsMatch(word))
                        .Select(word => new Word(word)).ToList();

            WordOccurrence lastWordOccurrence = null;
            Word           temporaryWord;

            foreach (var word in words)
            {
                if (profile.Vocabulary.Any(w => w.Equals(word)))
                {
                    temporaryWord = profile.Vocabulary.SingleOrDefault(w => w.Equals(word));
                }

                else if (_wordRepository.Get(word) != null)
                {
                    temporaryWord = _wordRepository.Get(word);
                }

                else
                {
                    temporaryWord = _wordRepository.Add(word);
                }

                if (temporaryWord == null)
                {
                    continue;
                }

                var currentWordOccurrence = profile.Words.SingleOrDefault(wo => wo.Word == temporaryWord);

                if (currentWordOccurrence == null)
                {
                    currentWordOccurrence = profile.AddWord(temporaryWord);
                }
                else
                {
                    currentWordOccurrence.Occurrence++;
                }

                lastWordOccurrence?.AddOccurrence(currentWordOccurrence.Word);

                lastWordOccurrence = currentWordOccurrence;
            }

            return(profile);
        }
 public void CreateArguments()
 {
     Assert.Throws <ArgumentNullException>(() => WordOccurrence.Create(null, helper.RawTextExractor.Object, helper.InquirerManager.Object, "Test", null, POSTags.Instance.NN));
     Assert.Throws <ArgumentNullException>(() => WordOccurrence.Create(helper.Handler.Object, null, helper.InquirerManager.Object, "Test", null, POSTags.Instance.NN));
     Assert.Throws <ArgumentNullException>(() => WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, null, "Test", null, POSTags.Instance.NN));
     Assert.Throws <ArgumentException>(() => WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, null, null, POSTags.Instance.NN));
     Assert.Throws <ArgumentNullException>(() => WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "Test", null, null));
     Assert.Throws <ArgumentException>(() => WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "Test", null, POSTags.Instance.SBAR));
 }
 public void Setup()
 {
     helper = new WordsHandlerHelper();
     helper.RawTextExractor.Setup(item => item.GetWord("test")).Returns("T");
     helper.Handler.Setup(item => item.CheckSentiment(It.IsAny <WordOccurrence>())).Returns(new SentimentValue(new TestWordItem(), 2));
     helper.Handler.Setup(item => item.IsStop(It.IsAny <WordOccurrence>())).Returns(true);
     helper.Handler.Setup(item => item.MeasureQuantifier(It.IsAny <WordOccurrence>())).Returns(2);
     instance = WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "Test", null, POSTags.Instance.NN);
 }
Esempio n. 5
0
        public void Add()
        {
            Assert.Throws <ArgumentNullException>(() => instance.Add(null));
            instance.Add(WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "Test", null, POSTags.Instance.NN));
            instance.Add(WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "Test", null, POSTags.Instance.NN));

            Assert.AreEqual("test test", instance.Text);
            Assert.AreEqual("t t", instance.Stemmed);
            Assert.AreEqual("NN", instance.POS.Tag);
            Assert.IsNotNull(instance.Relationship);
            Assert.IsFalse(instance.IsSentiment);
            Assert.IsFalse(instance.IsTopAttribute);
            Assert.AreEqual(2, instance.QuantValue);
            Assert.IsFalse(instance.IsQuestion);
            Assert.IsFalse(instance.IsFeature);
            Assert.IsFalse(instance.IsFixed);
            Assert.IsFalse(instance.IsStopWord);
            Assert.IsFalse(instance.IsSimple);
            Assert.AreEqual(2, instance.AllWords.Count());
        }
 public void CreateFixed()
 {
     instance = WordOccurrence.Create(helper.Handler.Object, helper.RawTextExractor.Object, helper.InquirerManager.Object, "xxxbad", null, POSTags.Instance.NN);
     Assert.IsTrue(instance.IsFixed);
 }
Esempio n. 7
0
        private static void BuildFrequencyDictionary()
        {
            var result = new FrequencyResults();

            Console.WriteLine("How many sentences do you want to parse?");
            var nbOfSentencesToParse = int.Parse(Console.ReadLine());

            var nbOfAlreadyParsedSentences = 0;
            var frequencyDirectory         = Utilities.PathToDownloadDirectory + "frequencies";

            if (!Directory.Exists(frequencyDirectory))
            {
                Directory.CreateDirectory(frequencyDirectory);
            }
            var frequencyFilePath           = frequencyDirectory + "/frequencies.txt";
            var excludedFrequencyFilePath   = frequencyDirectory + "/excluded-frequencies.txt";
            var nbOfSentencesParsedFilePath = frequencyDirectory + "/nbOfSentencesParsed.txt";
            var parsingResumed = false;

            if (File.Exists(nbOfSentencesParsedFilePath))
            {
                int nbOfSentencesParsed;
                if (int.TryParse(File.ReadAllText(nbOfSentencesParsedFilePath), out nbOfSentencesParsed))
                {
                    Console.WriteLine("{0} sentences have already been parsed. Resume parsing? (y/n)", nbOfSentencesParsed);
                    var resumeParsing = string.Equals(Console.ReadLine(), "Y", StringComparison.InvariantCultureIgnoreCase);
                    if (resumeParsing)
                    {
                        nbOfAlreadyParsedSentences = nbOfSentencesParsed;
                        parsingResumed             = true;
                    }
                }
            }

            var sw = Stopwatch.StartNew();

            Console.WriteLine("Building of frequency dictionary started");

            // Tokenize the sentences and compute the frequencies
            Func <string[], int, bool> extractTokens = (tokens, sentenceCounter) =>
            {
                for (var i = 0; i < tokens.Length; i++)
                {
                    var wordOccurence = new WordOccurrence()
                    {
                        IsFirstTokenInSentence = i == 0,
                        Word = tokens[i]
                    };
                    result.AddOccurence(wordOccurence);
                }
                return(true);
            };

            Utilities.ExtractTokensFromTxtFiles(extractTokens, nbOfSentencesToParse, nbOfAlreadyParsedSentences);

            // Load previous frequency dictionaries that were already computed
            if (parsingResumed)
            {
                result.LoadFrequencyDictionary(frequencyFilePath);
                result.LoadFrequencyDictionary(excludedFrequencyFilePath);
            }

            // Save frequency files on disk
            result.SaveFrequencyDictionary(frequencyFilePath);
            result.SaveExcludedFrequencyDictionary(excludedFrequencyFilePath);

            // Save the nb of sentences parsed (for information and being able to relaunch the parsing at this point)
            File.WriteAllText(nbOfSentencesParsedFilePath, nbOfSentencesToParse.ToString());

            Console.WriteLine("Building of frequency dictionary done");
            Console.WriteLine("=====================================");

            sw.Stop();
            Console.WriteLine("Ellapsed time: {0}", sw.Elapsed.ToString("g"));
        }
Esempio n. 8
0
 public IDictionaryEnumerator GetWordsByOccurrenceEnumerator()
 {
     SortedList sl = new SortedList();
     IDictionaryEnumerator enumer = GetWordsAlphabeticallyEnumerator();
     while (enumer.MoveNext())
     {
         WordOccurrence wo = new WordOccurrence((int)enumer.Value, (string)enumer.Key);
         sl.Add(wo, null);
     }
     return sl.GetEnumerator();
 }
Esempio n. 9
0
        protected override void CalculateRatingLogic()
        {
            TextVectorCell[] cells = Review.Vector.GetCells().ToArray();
            (double Probability, double Normalization, VectorData Vector)result = Model.GetVector(cells);
            VectorData vector = result.Vector;

            if (vector == null ||
                vector.Length == 0)
            {
                Rating = Review.CalculateRawRating();
                return;
            }

            var        bias           = vector.RHO;
            var        fallbackWeight = 0.1;
            VectorCell lexicon        = default;

            foreach (VectorCell item in vector.Cells)
            {
                var cell = (TextVectorCell)item.Data;
                if (cell.Name == Constants.RATING_STARS)
                {
                    lexicon = item;
                }

                if (cell.Item != null)
                {
                    var word = (IWordItem)cell.Item;
                    Add(new SentimentValue(word, word.Text, new SentimentValueData(item.Calculated, SentimentSource.AdjustedSVM)));
                }
                else
                {
                    bias += item.Calculated;
                }
            }

            var notAddedSentiments = new List <SentimentValue>();

            foreach (SentimentValue sentimentValue in Review.GetAllSentiments())
            {
                if (!ContainsSentiment(sentimentValue.Owner))
                {
                    notAddedSentiments.Add(sentimentValue);
                }
            }

            if (lexicon != null)
            {
                var totalWords = Review.GetAllSentiments().Length;
                fallbackWeight = Math.Abs(lexicon.Theta) / totalWords;
            }

            if (notAddedSentiments.Count > 0)
            {
                foreach (SentimentValue sentiment in notAddedSentiments)
                {
                    Add(new SentimentValue(sentiment.Owner, sentiment.Span, new SentimentValueData(sentiment.DataValue.Value * fallbackWeight, SentimentSource.AdjustedCalculated)));
                }
            }

            if (TotalSentiments > 0)
            {
                Add(new SentimentValue(
                        WordOccurrence.CreateBasic(Constants.BIAS, POSTags.Instance.JJ),
                        "BIAS",
                        new SentimentValueData(bias, SentimentSource.AdjustedSVM)));
            }

            if (Rating.HasValue)
            {
                if (Rating.IsPositive.Value &&
                    result.Probability < 0.5)
                {
                    log.LogDebug("Mistmatch in sentiment with machine prediction: {0} - {1}", Rating.IsPositive, result.Probability);
                }
            }
        }