Пример #1
0
        internal void AddSpan(JsonMessagePullerSpan newSpan)
        {
            if (newSpan.StartMessageId > newSpan.EndMessageId)
            {
                throw new ArgumentException("Invalid span: startMessageId > endMessageId");
            }

            foreach (var span in Spans.ToArray())
            {
                // Are the spans overlapping (or adjacent)?
                if (span.StartMessageId <= (newSpan.EndMessageId + 1) && (span.EndMessageId + 1) >= newSpan.StartMessageId)
                {
                    // Expand newSpan to include span
                    if (span.EndMessageId > newSpan.EndMessageId)
                    {
                        newSpan.EndMessageId = span.EndMessageId;
                    }
                    if (span.StartMessageId < newSpan.StartMessageId)
                    {
                        newSpan.StartMessageId = span.StartMessageId;
                        newSpan.StartTimeUtc   = span.StartTimeUtc;
                    }

                    // Discard the old span
                    Spans.Remove(span);
                }
            }

            // Append newSpan
            Spans.Add(newSpan);
            Spans.Sort();
        }
Пример #2
0
        async Task ProcessSpanAsync(JsonSyncingFeed syncingFeed, bool forceCheckNew)
        {
            this.appContext.RequireForegroundThread();

            long     feedId   = syncingFeed.FeedId;
            DateTime queryUtc = DateTime.UtcNow;

            bool checkNew = forceCheckNew ||
                            syncingFeed.Spans.Count < 1 ||
                            (syncingFeed.Spans.Count == 1 && syncingFeed.ReachedEmptyResult);

            long?olderThan = null;

            if (checkNew)
            {
                syncingFeed.SpanCyclesSinceCheckNew = 0;
                syncingFeed.LastCheckNewUtc         = queryUtc;
            }
            else
            {
                ++syncingFeed.SpanCyclesSinceCheckNew;

                // Work backwards from the most recent gap
                var lastSpan = syncingFeed.Spans.Last();
                olderThan = lastSpan.StartMessageId;
            }
            syncingFeed.LastUpdateUtc = queryUtc;

            if (CallingService != null)
            {
                CallingService(this, new MessagePullerCallingServiceEventArgs(feedId, null));
            }

            JsonMessageEnvelope envelope;

            try
            {
                // Perform a REST query like this:
                // https://www.yammer.com/example.com/api/v1/messages/in_group/3.json?threaded=extended&older_than=129
                envelope = await yamsterApi.GetMessagesInFeedAsync(feedId, olderThan);

                this.appContext.RequireForegroundThread();
            }
            catch (RateLimitExceededException ex)
            {
                yamsterApi.NotifyRateLimitExceeded();
                OnError(ex);
                return;
            }
            catch (WebException ex)
            {
                var response = ex.Response as HttpWebResponse;
                if (response != null)
                {
                    if (response.StatusCode == HttpStatusCode.NotFound && syncingFeed.GroupState != null)
                    {
                        // The group does not exist; disable further syncing for it and report
                        // a more specific error
                        DbGroupState groupState = syncingFeed.GroupState;
                        groupState.ShouldSync = false;
                        yamsterCoreDb.GroupStates.InsertRecord(groupState, SQLiteConflictResolution.Replace);
                        OnError(new YamsterFailedSyncException(feedId, ex));
                        return;
                    }
                }
                // A general error has occurred
                yamsterApi.BackOff();
                OnError(ex);
                return;
            }

            var newSpan = new JsonMessagePullerSpan();

            newSpan.StartMessageId = long.MaxValue;
            newSpan.StartTimeUtc   = DateTime.MaxValue;
            newSpan.EndMessageId   = long.MinValue;

            using (var transaction = yamsterArchiveDb.BeginTransaction())
            {
                WriteReferencesToDb(envelope.References, queryUtc);

                foreach (var threadStarter in envelope.Messages)
                {
                    // Clean up any corrupted data
                    if (yamsterCoreDb.SyncingThreads
                        .DeleteRecords("WHERE [ThreadId] = " + threadStarter.ThreadId) > 0)
                    {
                        Debug.WriteLine("MessagePuller: WARNING: Removed unexpected sync state for thread ID={0}",
                                        threadStarter.ThreadId);
                    }

                    JsonMessage[] extendedMessages;

                    // Note that ThreadedExtended is indexed by thread ID, not message ID
                    if (!envelope.ThreadedExtended.TryGetValue(threadStarter.ThreadId, out extendedMessages))
                    {
                        extendedMessages = new JsonMessage[0];
                    }

                    // Update the span bounds
                    long     latestMessageIdInThread;
                    DateTime latestMessageTimeInThread;
                    if (extendedMessages.Length > 0)
                    {
                        latestMessageIdInThread   = extendedMessages.Max(x => x.Id);
                        latestMessageTimeInThread = extendedMessages.Max(x => x.Created);
                    }
                    else
                    {
                        latestMessageIdInThread   = threadStarter.Id;
                        latestMessageTimeInThread = threadStarter.Created;
                    }
                    newSpan.StartMessageId = Math.Min(newSpan.StartMessageId, latestMessageIdInThread);
                    if (latestMessageTimeInThread < newSpan.StartTimeUtc)
                    {
                        newSpan.StartTimeUtc = latestMessageTimeInThread;
                    }
                    newSpan.EndMessageId = Math.Max(newSpan.EndMessageId, latestMessageIdInThread);

                    WriteMessageToDb(threadStarter, queryUtc);

                    // NOTE: The thread is presumed to be contiguous at this point.
                    // This is guaranteed to return at least threadStarter.Id written above
                    long latestMessageInDb = yamsterArchiveDb.Mapper.QueryScalar <long>(
                        "SELECT MAX(Id) FROM [" + this.yamsterArchiveDb.ArchiveMessages.TableName
                        + "] WHERE ThreadId = " + threadStarter.ThreadId.ToString());

                    // There are two scenarios where we can prove that there is no gap,
                    // i.e. that we already have all the messages for the thread.
                    // NOTE:  Originally we assumed there was no gap if extendedMessages.Length<2,
                    // but a counterexample was found.
                    bool gapped = true;

// For debugging -- skip pulling most messages to accumulate threads faster
#if false
                    if ((threadStarter.ThreadId & 31) != 0)
                    {
                        gapped = false;
                    }
#endif
                    // Scenario 1: Does the envelope contain the complete thread?
                    var threadReference = envelope.References
                                          .OfType <ThreadReferenceJson>()
                                          .Where(x => x.Id == threadStarter.ThreadId)
                                          .FirstOrDefault();

                    if (threadReference != null)
                    {
                        // (+1 for threadStarter)
                        if (extendedMessages.Length + 1 == threadReference.Stats.MessagesCount)
                        {
// This criteria should work, but I found cases where Yammer's counter is incorrect
#if false
                            // The envelope contains the complete thread
                            gapped = false;
#endif
                        }
                    }
                    else
                    {
                        // This should never happen, but if it does it's okay if we wrongly assume
                        // the thread is gapped
                        Debug.Assert(false);
                    }

                    // Scenario 2: Do the envelope messages overlap with the database's version of the thread?
                    if (gapped && extendedMessages.Length > 0)
                    {
                        long extendedStartId = extendedMessages.Min(x => x.Id);
                        if (latestMessageInDb >= extendedStartId)
                        {
                            // Yes, the messages overlap
                            gapped = false;
                        }
                    }

                    if (gapped)
                    {
                        var gappedThread = new DbSyncingThread();
                        gappedThread.FeedId        = feedId;
                        gappedThread.ThreadId      = threadStarter.ThreadId;
                        gappedThread.StopMessageId = latestMessageInDb;

                        // NOTE: In a static database, it would be most efficient to call
                        // WriteMessageToDb() for the extendedMessages that we already received
                        // and pick up with LastPulledMessageId=extendedStartId.
                        // However, if we assume people are actively posting in Yammer, it's
                        // better to begin processing a gapped thread by querying for the absolute
                        // latest stuff, since a fair amount of time may have elapsed by the
                        // time we get around to doing the query.
                        gappedThread.LastPulledMessageId = null;

                        gappedThread.RetryCount = 0;

                        // A key violation should be impossible here since if there was a conflicting
                        // record, we deleted it above.
                        yamsterCoreDb.SyncingThreads.InsertRecord(gappedThread);
                    }
                    else
                    {
                        foreach (var extendedMessage in extendedMessages)
                        {
                            WriteMessageToDb(extendedMessage, queryUtc);
                        }
                    }
                }

                if (envelope.Messages.Length > 0)
                {
                    if (olderThan.HasValue)
                    {
                        // If the Yammer result includes messages newer than what we asked
                        // for with olderThan, this is most likely a bug.
                        // NOTE: Skip this check for the Inbox feed, which seems to have minor
                        // overlap about 50% of the time.  This issue wasn't observed in the Yammer web page,
                        // but that may be due to the additional filtering there for seen/unarchived.
                        if (feedId != YamsterGroup.InboxFeedId)
                        {
                            Debug.Assert(newSpan.EndMessageId < olderThan);
                        }

                        // If olderThan was specified, then the span actually covers anything
                        // up to that point in the history
                        newSpan.EndMessageId = olderThan.Value - 1;
                    }

                    // Now create a span corresponding to the range of messages we just received.
                    syncingFeed.AddSpan(newSpan);
                }
                else
                {
                    syncingFeed.ReachedEmptyResult = true;
                }

                yamsterCoreDb.UpdateJsonSyncingFeed(feedId, syncingFeed);

                if (feedId == YamsterGroup.InboxFeedId)
                {
                    // For each GroupId in the messages that we wrote, make sure ShowInYamster = 1
                    string showInYamsterSql = string.Format(
                        @"UPDATE [GroupStates] SET [ShowInYamster] = 1"
                        + " WHERE [GroupId] in ({0}) AND [ShowInYamster] <> 1",
                        string.Join(
                            ", ",
                            envelope.Messages.Where(x => x.GroupId != null).Select(x => x.GroupId).Distinct()
                            )
                        );
                    yamsterCoreDb.Mapper.ExecuteNonQuery(showInYamsterSql);

                    // For each ThreadId in the messages that we wrote, mark it as appearing in the inbox
                    string seenInInboxSql = string.Format(
                        @"UPDATE [ThreadStates] SET [SeenInInboxFeed] = 1"
                        + " WHERE [ThreadId] in ({0}) AND [SeenInInboxFeed] <> 1",
                        string.Join(
                            ", ",
                            envelope.Messages.Select(x => x.ThreadId).Distinct()
                            )
                        );
                    yamsterCoreDb.Mapper.ExecuteNonQuery(seenInInboxSql);
                }

                transaction.Commit();

                if (UpdatedDatabase != null)
                {
                    UpdatedDatabase(this, EventArgs.Empty);
                }
            }
        }