/// <summary> /// Gets all the users tweets. /// </summary> /// <returns></returns> public IList<Tweet> GetAllTweets() { List<Tweet> tweets = new List<Tweet>(); TwitterService twitterService = new TwitterService(AuthenticationTokens.TwitterConsumerKey, AuthenticationTokens.TwitterConsumerSecret); twitterService.AuthenticateWith(AuthenticationTokens.TwitterConsumerKey, AuthenticationTokens.TwitterConsumerSecret, _authentication.AccessToken, _authentication.AccessTokenSecret); TwitterUser twitterUser = twitterService.VerifyCredentials(); //ListTweetsOnHomeTimeline only returns 200 (or 800?) results each go. Need to send the requests a few times per hour //with the paging/counts set? IEnumerable<TwitterStatus> returnedTweets = null; //try it a few times int retryCount = 0; while (returnedTweets == null) { returnedTweets = twitterService.ListTweetsOnHomeTimeline(200); retryCount++; //give up after 5 retries if (retryCount == 5) return tweets; } foreach (var returnedTweet in returnedTweets) { Tweet tweet = new Tweet(); TwitterStatus statusToExamine = returnedTweet; if (returnedTweet.RetweetedStatus != null) { statusToExamine = returnedTweet.RetweetedStatus; } tweet.Author = new Linker() { Id = statusToExamine.Author.ScreenName, Name = statusToExamine.Author.ScreenName }; tweet.Content = statusToExamine.Text; tweet.DatePosted = statusToExamine.CreatedDate; tweet.TweetId = statusToExamine.Id; tweet.ReTweetCount = GetRetweetCountFromRawData(statusToExamine.RawSource); tweets.Add(tweet); } return tweets; }
/// <summary> /// Updates the date indexed to be now. /// </summary> public void UpdateDateIndexed(Tweet tweetToUpdate) { FSDirectory tweetDirectory = FSDirectory.Open(new DirectoryInfo(Settings.TWEET_INDEX_DIR)); IndexWriter tweetWriter = new IndexWriter(tweetDirectory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED); //update the date indexed on the tweet Document existingTweet = _searchengine.GetDocumentForTweetId(tweetToUpdate.TweetId); //update the field when it was updated Field dateUpdated = existingTweet.GetField(Settings.FIELD_TWEET_DATE_INDEXED); dateUpdated.SetValue(DateTime.Now.ToString()); tweetWriter.UpdateDocument(new Term(Settings.FIELD_TWEET_ID, existingTweet.GetField(Settings.FIELD_TWEET_ID).StringValue()), existingTweet); tweetWriter.Close(); }
/// <summary> /// Indexes all the urls in the supplied tweet /// </summary> public void IndexUrlsInTweet(Tweet tweet, IList<string> indexes) { //find urls to index in the url index foreach (Uri uri in tweet.GetUrlsFromTweet()) { Console.WriteLine("URL" + uri); //setup index writer FSDirectory luceneDirectory = FSDirectory.Open(new DirectoryInfo(Settings.URL_INDEX_DIR)); IndexWriter writer = new IndexWriter(luceneDirectory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED); //need to check if its not already indexed //if it is already indexed, then just add a user to the index field in lucene Document existingDoc = _searchengine.GetDocumentForUrl(uri.ToString()); if (existingDoc != null) { //document already exists, add a user to it. Console.WriteLine("Already Exists"); bool wasUpdated = false; wasUpdated |= UpdateIndexes(existingDoc, indexes); wasUpdated |= UpdateTweets(existingDoc, tweet.TweetId); //only update document if it was changed. if (wasUpdated) writer.UpdateDocument(new Term(Settings.FIELD_URL_ID, existingDoc.GetField(Settings.FIELD_URL_ID).StringValue()), existingDoc); writer.Close(); continue; } Document luceneDocument = IndexUrl(uri, indexes, tweet.TweetId); if (luceneDocument != null) writer.AddDocument(luceneDocument); writer.Optimize(); writer.Close(); } //update the date indexed on the tweet _tweetIndexer.UpdateDateIndexed(tweet); }
/// <summary> /// Indexes a tweet in the TweetIndex /// Checks if it is new, if not, it will update the index list with the indexId /// If it is new, it will set it as unprocessed so the UrlIndexer will read it /// </summary> public void IndexTweet(Tweet tweetToIndex, string indexId) { //setup index writing FSDirectory tweetDirectory = FSDirectory.Open(new DirectoryInfo(Settings.TWEET_INDEX_DIR)); IndexWriter tweetWriter = new IndexWriter(tweetDirectory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED); //check the tweet is not already indexed. Document existingTweet = _searchengine.GetDocumentForTweetId(tweetToIndex.TweetId); //if the tweet doens't exist, index it. if (existingTweet == null) { Document tweetDocument = IndexTweetDetails(tweetToIndex, indexId); tweetWriter.AddDocument(tweetDocument); } else { //update the index UpdateIndexForDocument(indexId, existingTweet); } tweetWriter.Optimize(); tweetWriter.Close(); }
/// <summary> /// Gets the lucene document for a certain tweet /// </summary> private Document IndexTweetDetails(Tweet tweet, string indexId) { Document luceneDocument = new Document(); Field textField = new Field(Settings.FIELD_TWEET_TEXT, tweet.Content, Field.Store.YES, Field.Index.ANALYZED); Field idField = new Field(Settings.FIELD_TWEET_ID, tweet.TweetId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); Field linkerNameField = new Field(Settings.FIELD_TWEET_LINKER_ID, tweet.Author.Name, Field.Store.YES, Field.Index.NOT_ANALYZED); Field linkerIdField = new Field(Settings.FIELD_TWEET_LINKER_NAME, tweet.Author.Id, Field.Store.YES, Field.Index.NOT_ANALYZED); Field linkerRepField = new Field(Settings.FIELD_TWEET_LINKER_REP, tweet.Author.ReputationScore.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); Field datePostedField = new Field(Settings.FIELD_TWEET_DATE_POSTED, tweet.DatePosted.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); Field dateUpdatedField = new Field(Settings.FIELD_TWEET_DATE_INDEXED, "0", Field.Store.YES, Field.Index.NOT_ANALYZED); Field indexField = new Field(Settings.FIELD_TWEET_INDEXES, indexId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED); luceneDocument.Add(textField); luceneDocument.Add(idField); luceneDocument.Add(linkerNameField); luceneDocument.Add(linkerRepField); luceneDocument.Add(linkerIdField); luceneDocument.Add(datePostedField); luceneDocument.Add(dateUpdatedField); luceneDocument.Add(indexField); return luceneDocument; }
/// <summary> /// Parses the message. /// </summary> /// <param name="p">The p.</param> private void ParseMessage(string p, string indexIdentifier ) { // Console.WriteLine(p); JObject obj = (JObject)JsonConvert.DeserializeObject(p); //check its an update var status = obj.SelectToken("user", false); if (status != null) { var urls = obj.SelectToken("entities.urls", false); if (urls.HasValues) { //it has a URL Console.WriteLine(urls[0]["url"]); Tweet tweet = new Tweet(); tweet.Author = new Linker() { Id = obj.SelectToken("user.id", false).ToString(), Name = (string)obj.SelectToken("user.screen_name", false) }; tweet.Content = (string)obj.SelectToken("text", false); tweet.TweetId = (long) obj.SelectToken("id", false); tweet.DatePosted = Tweet.GetDateTimeFromTwitterFormat((string)obj["created_at"]); tweet.ReTweetCount = (int)obj.SelectToken("retweet_count", false); System.Diagnostics.Debug.WriteLine(tweet.Content); //get the index //TODO: when site streaming activated updated it for this //string indexIdentifier = (string) obj.SelectToken("for_user", false); _tweetIndexer.IndexTweet(tweet, indexIdentifier); } } System.Diagnostics.Debug.WriteLine("Message: {0}", new object[] { obj.ToString() }); }
/// <summary> /// Extracts the tweet from the document fields /// </summary> private Tweet GetTweetFromDocument(Document tweetDoc) { Tweet tweet = new Tweet(); tweet.Content = tweetDoc.GetField(Settings.FIELD_TWEET_TEXT).StringValue(); tweet.TweetId = long.Parse(tweetDoc.GetField(Settings.FIELD_TWEET_ID).StringValue()); tweet.DatePosted = DateTime.Parse(tweetDoc.GetField(Settings.FIELD_TWEET_DATE_POSTED).StringValue()); tweet.Author = new Linker(); tweet.Author.Id = tweetDoc.GetField(Settings.FIELD_TWEET_LINKER_ID).StringValue(); tweet.Author.Name = tweetDoc.GetField(Settings.FIELD_TWEET_LINKER_NAME).StringValue(); tweet.Author.ReputationScore = double.Parse(tweetDoc.GetField(Settings.FIELD_TWEET_LINKER_REP).StringValue()); return tweet; }