public void AddEdge_EdgeParentTweetMustBeSet() { Edge e1 = new Edge(new HashTag(), new HashTag()); Tweet t = new Tweet(); t.AddEdge(e1); Assert.AreSame(t, e1.ParentTweet); }
public void AddEdge() { Edge e1 = new Edge(new HashTag(), new HashTag()); Tweet t = new Tweet(); t.AddEdge(e1); int expectecEdgeCount = 1; Assert.AreEqual<int>(expectecEdgeCount, t.Edges.Length); }
public void AddSameEdgeToTwoTweets_EdgeParentTweetMustBeTheLastOne() { Edge e1 = new Edge(new HashTag(), new HashTag()); Tweet t = new Tweet(); t.AddEdge(e1); Tweet t2 = new Tweet(); t2.AddEdge(e1); Assert.AreSame(t2, e1.ParentTweet); }
public void Dispose_MustRemoveAllItsEdges() { Edge e1 = new Edge(new HashTag(), new HashTag()); Tweet t = new Tweet(); t.AddEdge(e1); t.Dispose(); int expectedSiblingCount = 0; Assert.AreEqual<int>(expectedSiblingCount, e1.Left.Siblings.Length); Assert.AreEqual<int>(expectedSiblingCount, e1.Right.Siblings.Length); }
private Tweet[] GetSampleTweets() { System.Collections.Generic.List<Tweet> lstTweets = new System.Collections.Generic.List<Tweet>(); Tweet t = new Tweet(); t.text = "Hi. This is a #Test of #TweetAnalyzer"; t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 10); lstTweets.Add(t); t = new Tweet(); t.text = "This amazing #Test is called #UnitTest"; t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 15); lstTweets.Add(t); t = new Tweet(); t.text = "#UnitTest is a great practice in development of complex applications."; t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 35); lstTweets.Add(t); t = new Tweet(); t.text = "This tweet has no hashtag."; t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 37); lstTweets.Add(t); t = new Tweet(); t.text = "This #Unittest is very #helpful"; t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 42); lstTweets.Add(t); t = new Tweet(); t.text = "This #Unittest helps analyze the #tweetanalyzer itsetlf"; t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 40, 59); lstTweets.Add(t); t = new Tweet(); t.text = "This tweet is outside the #timewindow and is #helpful"; // t.timestamp_ms = "12"; t.CreatedAt = new DateTime(2015, 11, 3, 21, 41, 25); lstTweets.Add(t); return lstTweets.ToArray(); }
public static async Task ExtractTweetFeatures(Tweet tweet, Repositories.IOutput outpurRepo) { //Maintain the 60 second window of tweets MaintainTimeWindow(tweet, 60); string strResult = tweet.text; //Clean the tweet text up strResult = GetCleanedupTweetText(tweet.text); if (!string.IsNullOrEmpty(strResult)) { //Process hashtags and related features (graphs and so on) ProcessHashtagData(tweet, strResult); if (outpurRepo != null) { await outpurRepo.WriteFeatureOneOutputAsync(strResult, tweet.created_at); await outpurRepo.WriteFeatureTwoOutputAsync(CalculateAverageDegree()); } } }
/// <summary> /// Takes required actions to maintain the desired time window of tweets, by adding new input tweet to the queue and removing /// tweets old enough to get outside the window. /// </summary> /// <param name="newTweet">New received tweet</param> /// <param name="timeWindowDurationInSeconds">The desired duration of the window</param> private static void MaintainTimeWindow(Tweet newTweet, int timeWindowDurationInSeconds) { _qOneMinuteWindowsQueue.Enqueue(newTweet); if (string.IsNullOrEmpty(newTweet.text) || string.IsNullOrEmpty(newTweet.timestamp_ms)) { return; } while (_qOneMinuteWindowsQueue.Peek().CreatedAt < newTweet.CreatedAt.AddSeconds(-timeWindowDurationInSeconds)) { Tweet removedTweet = _qOneMinuteWindowsQueue.Dequeue(); ClearTweetHashtagObjects(removedTweet); removedTweet.Dispose(); //Calling Dispose() on the removed tweet is necessary for all its edges to get correctly removed. } }
/// <summary> /// Process hashtags and their relational graph in a tweet /// </summary> /// <param name="tweet">Tweet to be processed</param> /// <param name="tweetText">The cleaned-up ASCII string of the tweet.</param> private static void ProcessHashtagData(Tweet tweet, string tweetText) { //Extract hashtags from tweet text var hashtags = Regex.Matches(tweetText, @"(\b|\s)?#\w+"); foreach (Match tag in hashtags) { HashTag hashTag = null; string strHashtagName = tag.Value.Trim().ToLower(); if (!_dicHashtagNameMappings.TryGetValue(strHashtagName, out hashTag)) { hashTag = new HashTag() { Name = strHashtagName }; _dicHashtagNameMappings.Add(strHashtagName, hashTag); } foreach (var t in tweet.HashTags) { if (t != hashTag) { Edge newEdge = new Edge(t, hashTag); tweet.AddEdge(newEdge); Edge edExisting = null; if (_dicEdgeIdMappings.TryGetValue(newEdge.Id, out edExisting)) _dicEdgeIdMappings[newEdge.Id] = newEdge; else _dicEdgeIdMappings.Add(newEdge.Id, newEdge); } } tweet.HashTags.Add(hashTag); } }
/// <summary> /// Remove the tweet edges and their coresponding hashtags from the edges/hashtags dictionaries /// </summary> /// <param name="removedTweet">The tweet of which the edges and hastags should be removed.</param> private static void ClearTweetHashtagObjects(Tweet removedTweet) { foreach (Edge edge in removedTweet.Edges) { Edge edOut = null; if (_dicEdgeIdMappings.TryGetValue(edge.Id, out edOut) && edOut.ParentTweet == removedTweet) { HashTag htOut = null; if (_dicHashtagNameMappings.TryGetValue(edge.Left.Name, out htOut) && htOut.LastParentEdge == edge) _dicHashtagNameMappings.Remove(edge.Left.Name); else _dicHashtagNameMappings[edge.Left.Name].RemoveSibling(edge.Right); if (_dicHashtagNameMappings.TryGetValue(edge.Right.Name, out htOut) && htOut.LastParentEdge == edge) _dicHashtagNameMappings.Remove(edge.Right.Name); else _dicHashtagNameMappings[edge.Right.Name].RemoveSibling(edge.Left); _dicEdgeIdMappings.Remove(edge.Id); } } }