Пример #1
0
 /// <summary>
 /// Check tweet to determine whether to filter it out
 /// </summary>
 /// <param name="tweet">tweet to check</param>
 /// <returns>true if tweet should be filtered out</returns>
 public bool Exclude(JsonTweet tweet)
 {
     if (!string.IsNullOrEmpty(tweet.in_reply_to_user_id) && (tweet.in_reply_to_user_id != ownId))
     {
         Count++;
         return(true);
     }
     return(false);
 }
Пример #2
0
 /// <summary>
 /// Check tweet to determine whether to filter it out
 /// </summary>
 /// <param name="tweet">tweet to check</param>
 /// <returns>true if tweet should be filtered out</returns>
 public bool Exclude(JsonTweet tweet)
 {
     // Checked with archives from 2019-04-14 and 2020-07-03 - the "retweeted" flag is always false, even for retweets
     if (tweet.retweeted || (tweet.full_text.IndexOf("RT ") == 0) || (tweet.full_text.IndexOf("MT ") == 0))
     {
         Count++;
         return(true);
     }
     return(false);
 }
Пример #3
0
 /// <summary>
 /// Check tweet to determine whether to filter it out
 /// </summary>
 /// <param name="tweet">tweet to check</param>
 /// <returns>true if tweet should be filtered out</returns>
 public bool Exclude(JsonTweet tweet)
 {
     if ((this.hasStart && (this.start > tweet.created_at_time)) ||
         (this.hasEnd && (this.end < tweet.created_at_time)))
     {
         Count++;
         return(true);
     }
     return(false);
 }
Пример #4
0
 /// <summary>
 /// Check tweet to determine whether to filter it out
 /// </summary>
 /// <param name="tweet">tweet to check</param>
 /// <returns>true if tweet should be filtered out</returns>
 public bool Exclude(JsonTweet tweet)
 {
     foreach (var element in elements)
     {
         if (tweet.full_text.IndexOf(element) != -1)
         {
             Count++;
             return(true);
         }
     }
     return(false);
 }
Пример #5
0
        /// <summary>
        /// Filter given list of tweets
        /// If corrections are excluded, that will be done after all filters have been applied
        /// </summary>
        /// <param name="tweets">tweets to filter</param>
        /// <param name="excludeCorrections">If true, exclude the first of any pair of consecutive tweets that are almost identical</param>
        /// <param name="corrections">If corrections are excluded, this will be filled with correction pairs (excluded, kept)</param>
        /// <returns>filtered list of tweets</returns>
        public List <JsonTweet> FilterTweets(List <JsonTweet> tweets, bool excludeCorrections, List <Tuple <JsonTweet, JsonTweet> > corrections)
        {
            List <JsonTweet> result = new List <JsonTweet>();

            correctionCount         = 0;
            this.excludeCorrections = excludeCorrections;

            JsonTweet previous         = null;
            TimeSpan  correctionWindow = TimeSpan.FromHours(12);

            foreach (JsonTweet tweet in tweets)
            {
                // Apply filters
                bool exclude = false;
                foreach (IFilter filter in filters)
                {
                    if (exclude = filter.Exclude(tweet))
                    {
                        break;
                    }
                }
                if (exclude)
                {
                    continue;
                }

                // Compare to previous?
                if (excludeCorrections && (previous != null))
                {
                    if ((tweet.created_at_time - previous.created_at_time) < correctionWindow)
                    {
                        int distance = LevenshteinDistance.Compute(tweet.full_text, previous.full_text);
                        if (distance <= levenshteinDistanceLimit)
                        {
                            // Correction, so remove previous from list
                            result.Remove(previous);
                            ++correctionCount;
                            corrections.Add(new Tuple <JsonTweet, JsonTweet>(previous, tweet));
                        }
                    }
                }
                previous = tweet;
                result.Add(tweet);
            }
            return(result);
        }