Exemplo n.º 1
0
        /// <summary>
        /// Gets all the users tweets.
        /// </summary>
        /// <returns></returns>
        public IList<Tweet> GetAllTweets()
        {
            List<Tweet> tweets = new List<Tweet>();

            TwitterService twitterService = new TwitterService(AuthenticationTokens.TwitterConsumerKey, AuthenticationTokens.TwitterConsumerSecret);
            twitterService.AuthenticateWith(AuthenticationTokens.TwitterConsumerKey, AuthenticationTokens.TwitterConsumerSecret,
                                      _authentication.AccessToken, _authentication.AccessTokenSecret);

            TwitterUser twitterUser = twitterService.VerifyCredentials();

            //ListTweetsOnHomeTimeline only returns 200 (or 800?) results each go. Need to send the requests a few times per hour
            //with the paging/counts set?
            IEnumerable<TwitterStatus> returnedTweets = null;

            //try it a few times
            int retryCount = 0;
            while (returnedTweets == null)
            {
                returnedTweets = twitterService.ListTweetsOnHomeTimeline(200);

                retryCount++;

                //give up after 5 retries
                if (retryCount == 5)
                    return tweets;
            }

            foreach (var returnedTweet in returnedTweets)
            {
                Tweet tweet = new Tweet();

                TwitterStatus statusToExamine = returnedTweet;

                if (returnedTweet.RetweetedStatus != null)
                {
                    statusToExamine = returnedTweet.RetweetedStatus;

                }

                tweet.Author = new Linker() { Id = statusToExamine.Author.ScreenName, Name = statusToExamine.Author.ScreenName };
                tweet.Content = statusToExamine.Text;
                tweet.DatePosted = statusToExamine.CreatedDate;
                tweet.TweetId = statusToExamine.Id;
                tweet.ReTweetCount = GetRetweetCountFromRawData(statusToExamine.RawSource);

                tweets.Add(tweet);
            }
            return tweets;
        }
Exemplo n.º 2
0
        /// <summary>
        /// Updates the date indexed to be now.
        /// </summary>
        public void UpdateDateIndexed(Tweet tweetToUpdate)
        {
            FSDirectory tweetDirectory = FSDirectory.Open(new DirectoryInfo(Settings.TWEET_INDEX_DIR));
            IndexWriter tweetWriter = new IndexWriter(tweetDirectory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            //update the date indexed on the tweet
            Document existingTweet = _searchengine.GetDocumentForTweetId(tweetToUpdate.TweetId);

            //update the field when it was updated
            Field dateUpdated = existingTweet.GetField(Settings.FIELD_TWEET_DATE_INDEXED);
            dateUpdated.SetValue(DateTime.Now.ToString());

            tweetWriter.UpdateDocument(new Term(Settings.FIELD_TWEET_ID, existingTweet.GetField(Settings.FIELD_TWEET_ID).StringValue()), existingTweet);

            tweetWriter.Close();
        }
Exemplo n.º 3
0
        /// <summary>
        /// Indexes all the urls in the supplied tweet
        /// </summary>
        public void IndexUrlsInTweet(Tweet tweet, IList<string> indexes)
        {
            //find urls to index in the url index
            foreach (Uri uri in tweet.GetUrlsFromTweet())
            {
                Console.WriteLine("URL" + uri);

                //setup index writer
                FSDirectory luceneDirectory = FSDirectory.Open(new DirectoryInfo(Settings.URL_INDEX_DIR));
                IndexWriter writer = new IndexWriter(luceneDirectory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

                //need to check if its not already indexed
                //if it is already indexed, then just add a user to the index field in lucene
                Document existingDoc = _searchengine.GetDocumentForUrl(uri.ToString());
                if (existingDoc != null)
                {
                    //document already exists, add a user to it.
                    Console.WriteLine("Already Exists");

                    bool wasUpdated = false;

                    wasUpdated |= UpdateIndexes(existingDoc, indexes);
                    wasUpdated |= UpdateTweets(existingDoc, tweet.TweetId);

                    //only update document if it was changed.
                    if (wasUpdated)
                        writer.UpdateDocument(new Term(Settings.FIELD_URL_ID, existingDoc.GetField(Settings.FIELD_URL_ID).StringValue()), existingDoc);
                    writer.Close();
                    continue;
                }

                Document luceneDocument = IndexUrl(uri, indexes, tweet.TweetId);
                if (luceneDocument != null)
                    writer.AddDocument(luceneDocument);
                writer.Optimize();
                writer.Close();

            }

            //update the date indexed on the tweet
            _tweetIndexer.UpdateDateIndexed(tweet);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Indexes a tweet in the TweetIndex 
        /// Checks if it is new, if not, it will update the index list with the indexId
        /// If it is new, it will set it as unprocessed so the UrlIndexer will read it
        /// </summary>
        public void IndexTweet(Tweet tweetToIndex, string indexId)
        {
            //setup index writing
            FSDirectory tweetDirectory = FSDirectory.Open(new DirectoryInfo(Settings.TWEET_INDEX_DIR));
            IndexWriter tweetWriter = new IndexWriter(tweetDirectory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            //check the tweet is not already indexed.
            Document existingTweet = _searchengine.GetDocumentForTweetId(tweetToIndex.TweetId);

            //if the tweet doens't exist, index it.
            if (existingTweet == null)
            {
                Document tweetDocument = IndexTweetDetails(tweetToIndex, indexId);
                tweetWriter.AddDocument(tweetDocument);
            }
            else
            {
                //update the index
                UpdateIndexForDocument(indexId, existingTweet);
            }

            tweetWriter.Optimize();
            tweetWriter.Close();
        }
Exemplo n.º 5
0
        /// <summary>
        /// Gets the lucene document for a certain tweet
        /// </summary>
        private Document IndexTweetDetails(Tweet tweet, string indexId)
        {
            Document luceneDocument = new Document();
            Field textField = new Field(Settings.FIELD_TWEET_TEXT, tweet.Content, Field.Store.YES, Field.Index.ANALYZED);
            Field idField = new Field(Settings.FIELD_TWEET_ID, tweet.TweetId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field linkerNameField = new Field(Settings.FIELD_TWEET_LINKER_ID, tweet.Author.Name, Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field linkerIdField = new Field(Settings.FIELD_TWEET_LINKER_NAME, tweet.Author.Id, Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field linkerRepField = new Field(Settings.FIELD_TWEET_LINKER_REP, tweet.Author.ReputationScore.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field datePostedField = new Field(Settings.FIELD_TWEET_DATE_POSTED, tweet.DatePosted.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field dateUpdatedField = new Field(Settings.FIELD_TWEET_DATE_INDEXED, "0", Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field indexField = new Field(Settings.FIELD_TWEET_INDEXES, indexId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);

            luceneDocument.Add(textField);
            luceneDocument.Add(idField);
            luceneDocument.Add(linkerNameField);
            luceneDocument.Add(linkerRepField);
            luceneDocument.Add(linkerIdField);
            luceneDocument.Add(datePostedField);
            luceneDocument.Add(dateUpdatedField);
            luceneDocument.Add(indexField);

            return luceneDocument;
        }
        /// <summary>
        /// Parses the message.
        /// </summary>
        /// <param name="p">The p.</param>
        private void ParseMessage(string p, string indexIdentifier )
        {
            // Console.WriteLine(p);
            JObject obj = (JObject)JsonConvert.DeserializeObject(p);

            //check its an update
            var status = obj.SelectToken("user", false);

            if (status != null)
            {
                var urls = obj.SelectToken("entities.urls", false);
                if (urls.HasValues)
                {
                    //it has a URL
                    Console.WriteLine(urls[0]["url"]);

                    Tweet tweet = new Tweet();
                    tweet.Author = new Linker()
                                    {
                                        Id = obj.SelectToken("user.id", false).ToString(),
                                        Name = (string)obj.SelectToken("user.screen_name", false)
                                    };
                    tweet.Content = (string)obj.SelectToken("text", false);

                    tweet.TweetId = (long) obj.SelectToken("id", false);
                    tweet.DatePosted = Tweet.GetDateTimeFromTwitterFormat((string)obj["created_at"]);
                    tweet.ReTweetCount = (int)obj.SelectToken("retweet_count", false);
                    System.Diagnostics.Debug.WriteLine(tweet.Content);

                    //get the index
                    //TODO: when site streaming activated updated it for this
                    //string indexIdentifier = (string) obj.SelectToken("for_user", false);

                    _tweetIndexer.IndexTweet(tweet, indexIdentifier);
                }
            }

            System.Diagnostics.Debug.WriteLine("Message: {0}", new object[] { obj.ToString() });
        }
        /// <summary>
        /// Extracts the tweet from the document fields
        /// </summary>
        private Tweet GetTweetFromDocument(Document tweetDoc)
        {
            Tweet tweet = new Tweet();

            tweet.Content = tweetDoc.GetField(Settings.FIELD_TWEET_TEXT).StringValue();
            tweet.TweetId = long.Parse(tweetDoc.GetField(Settings.FIELD_TWEET_ID).StringValue());
            tweet.DatePosted = DateTime.Parse(tweetDoc.GetField(Settings.FIELD_TWEET_DATE_POSTED).StringValue());
            tweet.Author = new Linker();
            tweet.Author.Id = tweetDoc.GetField(Settings.FIELD_TWEET_LINKER_ID).StringValue();
            tweet.Author.Name = tweetDoc.GetField(Settings.FIELD_TWEET_LINKER_NAME).StringValue();
            tweet.Author.ReputationScore = double.Parse(tweetDoc.GetField(Settings.FIELD_TWEET_LINKER_REP).StringValue());

            return tweet;
        }