internal void AddSpan(JsonMessagePullerSpan newSpan) { if (newSpan.StartMessageId > newSpan.EndMessageId) { throw new ArgumentException("Invalid span: startMessageId > endMessageId"); } foreach (var span in Spans.ToArray()) { // Are the spans overlapping (or adjacent)? if (span.StartMessageId <= (newSpan.EndMessageId + 1) && (span.EndMessageId + 1) >= newSpan.StartMessageId) { // Expand newSpan to include span if (span.EndMessageId > newSpan.EndMessageId) { newSpan.EndMessageId = span.EndMessageId; } if (span.StartMessageId < newSpan.StartMessageId) { newSpan.StartMessageId = span.StartMessageId; newSpan.StartTimeUtc = span.StartTimeUtc; } // Discard the old span Spans.Remove(span); } } // Append newSpan Spans.Add(newSpan); Spans.Sort(); }
async Task ProcessSpanAsync(JsonSyncingFeed syncingFeed, bool forceCheckNew) { this.appContext.RequireForegroundThread(); long feedId = syncingFeed.FeedId; DateTime queryUtc = DateTime.UtcNow; bool checkNew = forceCheckNew || syncingFeed.Spans.Count < 1 || (syncingFeed.Spans.Count == 1 && syncingFeed.ReachedEmptyResult); long?olderThan = null; if (checkNew) { syncingFeed.SpanCyclesSinceCheckNew = 0; syncingFeed.LastCheckNewUtc = queryUtc; } else { ++syncingFeed.SpanCyclesSinceCheckNew; // Work backwards from the most recent gap var lastSpan = syncingFeed.Spans.Last(); olderThan = lastSpan.StartMessageId; } syncingFeed.LastUpdateUtc = queryUtc; if (CallingService != null) { CallingService(this, new MessagePullerCallingServiceEventArgs(feedId, null)); } JsonMessageEnvelope envelope; try { // Perform a REST query like this: // https://www.yammer.com/example.com/api/v1/messages/in_group/3.json?threaded=extended&older_than=129 envelope = await yamsterApi.GetMessagesInFeedAsync(feedId, olderThan); this.appContext.RequireForegroundThread(); } catch (RateLimitExceededException ex) { yamsterApi.NotifyRateLimitExceeded(); OnError(ex); return; } catch (WebException ex) { var response = ex.Response as HttpWebResponse; if (response != null) { if (response.StatusCode == HttpStatusCode.NotFound && syncingFeed.GroupState != null) { // The group does not exist; disable further syncing for it and report // a more specific error DbGroupState groupState = syncingFeed.GroupState; groupState.ShouldSync = false; yamsterCoreDb.GroupStates.InsertRecord(groupState, SQLiteConflictResolution.Replace); OnError(new YamsterFailedSyncException(feedId, ex)); return; } } // A general error has occurred yamsterApi.BackOff(); OnError(ex); return; } var newSpan = new JsonMessagePullerSpan(); newSpan.StartMessageId = long.MaxValue; newSpan.StartTimeUtc = DateTime.MaxValue; newSpan.EndMessageId = long.MinValue; using (var transaction = yamsterArchiveDb.BeginTransaction()) { WriteReferencesToDb(envelope.References, queryUtc); foreach (var threadStarter in envelope.Messages) { // Clean up any corrupted data if (yamsterCoreDb.SyncingThreads .DeleteRecords("WHERE [ThreadId] = " + threadStarter.ThreadId) > 0) { Debug.WriteLine("MessagePuller: WARNING: Removed unexpected sync state for thread ID={0}", threadStarter.ThreadId); } JsonMessage[] extendedMessages; // Note that ThreadedExtended is indexed by thread ID, not message ID if (!envelope.ThreadedExtended.TryGetValue(threadStarter.ThreadId, out extendedMessages)) { extendedMessages = new JsonMessage[0]; } // Update the span bounds long latestMessageIdInThread; DateTime latestMessageTimeInThread; if (extendedMessages.Length > 0) { latestMessageIdInThread = extendedMessages.Max(x => x.Id); latestMessageTimeInThread = extendedMessages.Max(x => x.Created); } else { latestMessageIdInThread = threadStarter.Id; latestMessageTimeInThread = threadStarter.Created; } newSpan.StartMessageId = Math.Min(newSpan.StartMessageId, latestMessageIdInThread); if (latestMessageTimeInThread < newSpan.StartTimeUtc) { newSpan.StartTimeUtc = latestMessageTimeInThread; } newSpan.EndMessageId = Math.Max(newSpan.EndMessageId, latestMessageIdInThread); WriteMessageToDb(threadStarter, queryUtc); // NOTE: The thread is presumed to be contiguous at this point. // This is guaranteed to return at least threadStarter.Id written above long latestMessageInDb = yamsterArchiveDb.Mapper.QueryScalar <long>( "SELECT MAX(Id) FROM [" + this.yamsterArchiveDb.ArchiveMessages.TableName + "] WHERE ThreadId = " + threadStarter.ThreadId.ToString()); // There are two scenarios where we can prove that there is no gap, // i.e. that we already have all the messages for the thread. // NOTE: Originally we assumed there was no gap if extendedMessages.Length<2, // but a counterexample was found. bool gapped = true; // For debugging -- skip pulling most messages to accumulate threads faster #if false if ((threadStarter.ThreadId & 31) != 0) { gapped = false; } #endif // Scenario 1: Does the envelope contain the complete thread? var threadReference = envelope.References .OfType <ThreadReferenceJson>() .Where(x => x.Id == threadStarter.ThreadId) .FirstOrDefault(); if (threadReference != null) { // (+1 for threadStarter) if (extendedMessages.Length + 1 == threadReference.Stats.MessagesCount) { // This criteria should work, but I found cases where Yammer's counter is incorrect #if false // The envelope contains the complete thread gapped = false; #endif } } else { // This should never happen, but if it does it's okay if we wrongly assume // the thread is gapped Debug.Assert(false); } // Scenario 2: Do the envelope messages overlap with the database's version of the thread? if (gapped && extendedMessages.Length > 0) { long extendedStartId = extendedMessages.Min(x => x.Id); if (latestMessageInDb >= extendedStartId) { // Yes, the messages overlap gapped = false; } } if (gapped) { var gappedThread = new DbSyncingThread(); gappedThread.FeedId = feedId; gappedThread.ThreadId = threadStarter.ThreadId; gappedThread.StopMessageId = latestMessageInDb; // NOTE: In a static database, it would be most efficient to call // WriteMessageToDb() for the extendedMessages that we already received // and pick up with LastPulledMessageId=extendedStartId. // However, if we assume people are actively posting in Yammer, it's // better to begin processing a gapped thread by querying for the absolute // latest stuff, since a fair amount of time may have elapsed by the // time we get around to doing the query. gappedThread.LastPulledMessageId = null; gappedThread.RetryCount = 0; // A key violation should be impossible here since if there was a conflicting // record, we deleted it above. yamsterCoreDb.SyncingThreads.InsertRecord(gappedThread); } else { foreach (var extendedMessage in extendedMessages) { WriteMessageToDb(extendedMessage, queryUtc); } } } if (envelope.Messages.Length > 0) { if (olderThan.HasValue) { // If the Yammer result includes messages newer than what we asked // for with olderThan, this is most likely a bug. // NOTE: Skip this check for the Inbox feed, which seems to have minor // overlap about 50% of the time. This issue wasn't observed in the Yammer web page, // but that may be due to the additional filtering there for seen/unarchived. if (feedId != YamsterGroup.InboxFeedId) { Debug.Assert(newSpan.EndMessageId < olderThan); } // If olderThan was specified, then the span actually covers anything // up to that point in the history newSpan.EndMessageId = olderThan.Value - 1; } // Now create a span corresponding to the range of messages we just received. syncingFeed.AddSpan(newSpan); } else { syncingFeed.ReachedEmptyResult = true; } yamsterCoreDb.UpdateJsonSyncingFeed(feedId, syncingFeed); if (feedId == YamsterGroup.InboxFeedId) { // For each GroupId in the messages that we wrote, make sure ShowInYamster = 1 string showInYamsterSql = string.Format( @"UPDATE [GroupStates] SET [ShowInYamster] = 1" + " WHERE [GroupId] in ({0}) AND [ShowInYamster] <> 1", string.Join( ", ", envelope.Messages.Where(x => x.GroupId != null).Select(x => x.GroupId).Distinct() ) ); yamsterCoreDb.Mapper.ExecuteNonQuery(showInYamsterSql); // For each ThreadId in the messages that we wrote, mark it as appearing in the inbox string seenInInboxSql = string.Format( @"UPDATE [ThreadStates] SET [SeenInInboxFeed] = 1" + " WHERE [ThreadId] in ({0}) AND [SeenInInboxFeed] <> 1", string.Join( ", ", envelope.Messages.Select(x => x.ThreadId).Distinct() ) ); yamsterCoreDb.Mapper.ExecuteNonQuery(seenInInboxSql); } transaction.Commit(); if (UpdatedDatabase != null) { UpdatedDatabase(this, EventArgs.Empty); } } }