/// <summary> /// Check tweet to determine whether to filter it out /// </summary> /// <param name="tweet">tweet to check</param> /// <returns>true if tweet should be filtered out</returns> public bool Exclude(JsonTweet tweet) { if (!string.IsNullOrEmpty(tweet.in_reply_to_user_id) && (tweet.in_reply_to_user_id != ownId)) { Count++; return(true); } return(false); }
/// <summary> /// Check tweet to determine whether to filter it out /// </summary> /// <param name="tweet">tweet to check</param> /// <returns>true if tweet should be filtered out</returns> public bool Exclude(JsonTweet tweet) { // Checked with archives from 2019-04-14 and 2020-07-03 - the "retweeted" flag is always false, even for retweets if (tweet.retweeted || (tweet.full_text.IndexOf("RT ") == 0) || (tweet.full_text.IndexOf("MT ") == 0)) { Count++; return(true); } return(false); }
/// <summary> /// Check tweet to determine whether to filter it out /// </summary> /// <param name="tweet">tweet to check</param> /// <returns>true if tweet should be filtered out</returns> public bool Exclude(JsonTweet tweet) { if ((this.hasStart && (this.start > tweet.created_at_time)) || (this.hasEnd && (this.end < tweet.created_at_time))) { Count++; return(true); } return(false); }
/// <summary> /// Check tweet to determine whether to filter it out /// </summary> /// <param name="tweet">tweet to check</param> /// <returns>true if tweet should be filtered out</returns> public bool Exclude(JsonTweet tweet) { foreach (var element in elements) { if (tweet.full_text.IndexOf(element) != -1) { Count++; return(true); } } return(false); }
/// <summary> /// Filter given list of tweets /// If corrections are excluded, that will be done after all filters have been applied /// </summary> /// <param name="tweets">tweets to filter</param> /// <param name="excludeCorrections">If true, exclude the first of any pair of consecutive tweets that are almost identical</param> /// <param name="corrections">If corrections are excluded, this will be filled with correction pairs (excluded, kept)</param> /// <returns>filtered list of tweets</returns> public List <JsonTweet> FilterTweets(List <JsonTweet> tweets, bool excludeCorrections, List <Tuple <JsonTweet, JsonTweet> > corrections) { List <JsonTweet> result = new List <JsonTweet>(); correctionCount = 0; this.excludeCorrections = excludeCorrections; JsonTweet previous = null; TimeSpan correctionWindow = TimeSpan.FromHours(12); foreach (JsonTweet tweet in tweets) { // Apply filters bool exclude = false; foreach (IFilter filter in filters) { if (exclude = filter.Exclude(tweet)) { break; } } if (exclude) { continue; } // Compare to previous? if (excludeCorrections && (previous != null)) { if ((tweet.created_at_time - previous.created_at_time) < correctionWindow) { int distance = LevenshteinDistance.Compute(tweet.full_text, previous.full_text); if (distance <= levenshteinDistanceLimit) { // Correction, so remove previous from list result.Remove(previous); ++correctionCount; corrections.Add(new Tuple <JsonTweet, JsonTweet>(previous, tweet)); } } } previous = tweet; result.Add(tweet); } return(result); }