Пример #1
0
        public void AddEdge_EdgeParentTweetMustBeSet()
        {
            Edge e1 = new Edge(new HashTag(), new HashTag());

            Tweet t = new Tweet();
            t.AddEdge(e1);

            Assert.AreSame(t, e1.ParentTweet);
        }
Пример #2
0
        public void AddEdge()
        {
            Edge e1 = new Edge(new HashTag(), new HashTag());

            Tweet t = new Tweet();
            t.AddEdge(e1);

            int expectecEdgeCount = 1;

            Assert.AreEqual<int>(expectecEdgeCount, t.Edges.Length);
        }
Пример #3
0
        public void AddSameEdgeToTwoTweets_EdgeParentTweetMustBeTheLastOne()
        {
            Edge e1 = new Edge(new HashTag(), new HashTag());

            Tweet t = new Tweet();
            t.AddEdge(e1);

            Tweet t2 = new Tweet();
            t2.AddEdge(e1);

            Assert.AreSame(t2, e1.ParentTweet);
        }
Пример #4
0
        public void Dispose_MustRemoveAllItsEdges()
        {
            Edge e1 = new Edge(new HashTag(), new HashTag());

            Tweet t = new Tweet();
            t.AddEdge(e1);

            t.Dispose();

            int expectedSiblingCount = 0;

            Assert.AreEqual<int>(expectedSiblingCount, e1.Left.Siblings.Length);
            Assert.AreEqual<int>(expectedSiblingCount, e1.Right.Siblings.Length);
        }
Пример #5
0
        private Tweet[] GetSampleTweets()
        {
            System.Collections.Generic.List<Tweet> lstTweets = new System.Collections.Generic.List<Tweet>();

            Tweet t = new Tweet();
            t.text = "Hi. This is a #Test of #TweetAnalyzer";
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 10);
            lstTweets.Add(t);

            t = new Tweet();
            t.text = "This amazing #Test is called #UnitTest";
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 15);
            lstTweets.Add(t);

            t = new Tweet();
            t.text = "#UnitTest is a great practice in development of complex applications.";
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 35);
            lstTweets.Add(t);

            t = new Tweet();
            t.text = "This tweet has no hashtag.";
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 37);
            lstTweets.Add(t);

            t = new Tweet();
            t.text = "This #Unittest is very #helpful";
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 42);
            lstTweets.Add(t);

            t = new Tweet();
            t.text = "This #Unittest helps analyze the #tweetanalyzer itsetlf";
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 59);
            lstTweets.Add(t);

            t = new Tweet();
            t.text = "This tweet is outside the #timewindow and is #helpful"; //
            t.timestamp_ms = "12";
            t.CreatedAt = new DateTime(2015, 11, 3, 21, 41, 25);
            lstTweets.Add(t);

            return lstTweets.ToArray();
        }
Пример #6
0
        public static async Task ExtractTweetFeatures(Tweet tweet, Repositories.IOutput outpurRepo)
        {
            //Maintain the 60 second window of tweets
            MaintainTimeWindow(tweet, 60);

            string strResult = tweet.text;

            //Clean the tweet text up
            strResult = GetCleanedupTweetText(tweet.text);

            if (!string.IsNullOrEmpty(strResult))
            {
                //Process hashtags and related features (graphs and so on)
                ProcessHashtagData(tweet, strResult);

                if (outpurRepo != null)
                {
                    await outpurRepo.WriteFeatureOneOutputAsync(strResult, tweet.created_at);
                    await outpurRepo.WriteFeatureTwoOutputAsync(CalculateAverageDegree());
                }
            }
        }
Пример #7
0
        /// <summary>
        /// Takes required actions to maintain the desired time window of tweets, by adding new input tweet to the queue and removing 
        /// tweets old enough to get outside the window.
        /// </summary>
        /// <param name="newTweet">New received tweet</param>
        /// <param name="timeWindowDurationInSeconds">The desired duration of the window</param>
        private static void MaintainTimeWindow(Tweet newTweet, int timeWindowDurationInSeconds)
        {
            _qOneMinuteWindowsQueue.Enqueue(newTweet);

            if (string.IsNullOrEmpty(newTweet.text) || string.IsNullOrEmpty(newTweet.timestamp_ms))
            {
                return;
            }

            while (_qOneMinuteWindowsQueue.Peek().CreatedAt < newTweet.CreatedAt.AddSeconds(-timeWindowDurationInSeconds))
            {
                Tweet removedTweet = _qOneMinuteWindowsQueue.Dequeue();

                ClearTweetHashtagObjects(removedTweet);
                removedTweet.Dispose(); //Calling Dispose() on the removed tweet is necessary for all its edges to get correctly removed.
            }
        }
Пример #8
0
        /// <summary>
        /// Process hashtags and their relational graph in a tweet
        /// </summary>
        /// <param name="tweet">Tweet to be processed</param>
        /// <param name="tweetText">The cleaned-up ASCII string of the tweet.</param>
        private static void ProcessHashtagData(Tweet tweet, string tweetText)
        {
            //Extract hashtags from tweet text
            var hashtags = Regex.Matches(tweetText, @"(\b|\s)?#\w+");

            foreach (Match tag in hashtags)
            {
                HashTag hashTag = null;
                string strHashtagName = tag.Value.Trim().ToLower();

                if (!_dicHashtagNameMappings.TryGetValue(strHashtagName, out hashTag))
                {
                    hashTag = new HashTag()
                    {
                        Name = strHashtagName
                    };

                    _dicHashtagNameMappings.Add(strHashtagName, hashTag);
                }

                foreach (var t in tweet.HashTags)
                {
                    if (t != hashTag)
                    {
                        Edge newEdge = new Edge(t, hashTag);
                        tweet.AddEdge(newEdge);

                        Edge edExisting = null;

                        if (_dicEdgeIdMappings.TryGetValue(newEdge.Id, out edExisting))
                            _dicEdgeIdMappings[newEdge.Id] = newEdge;
                        else
                            _dicEdgeIdMappings.Add(newEdge.Id, newEdge);
                    }
                }

                tweet.HashTags.Add(hashTag);
            }
        }
Пример #9
0
        /// <summary>
        /// Remove the tweet edges and their coresponding hashtags from the edges/hashtags dictionaries
        /// </summary>
        /// <param name="removedTweet">The tweet of which the edges and hastags should be removed.</param>
        private static void ClearTweetHashtagObjects(Tweet removedTweet)
        {
            foreach (Edge edge in removedTweet.Edges)
            {
                Edge edOut = null;
                if (_dicEdgeIdMappings.TryGetValue(edge.Id, out edOut) && edOut.ParentTweet == removedTweet)
                {
                    HashTag htOut = null;
                    if (_dicHashtagNameMappings.TryGetValue(edge.Left.Name, out htOut) && htOut.LastParentEdge == edge)
                        _dicHashtagNameMappings.Remove(edge.Left.Name);
                    else
                        _dicHashtagNameMappings[edge.Left.Name].RemoveSibling(edge.Right);

                    if (_dicHashtagNameMappings.TryGetValue(edge.Right.Name, out htOut) && htOut.LastParentEdge == edge)
                        _dicHashtagNameMappings.Remove(edge.Right.Name);
                    else
                        _dicHashtagNameMappings[edge.Right.Name].RemoveSibling(edge.Left);

                    _dicEdgeIdMappings.Remove(edge.Id);
                }
            }
        }