public async Task <bool> RunAsync(ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken)
        {
            await Task.WhenAll(front.LoadAsync(cancellationToken), back.LoadAsync(cancellationToken));

            Trace.TraceInformation("Run ( {0} , {1} )", front, back);

            bool result = false;

            HttpMessageHandler handler = null;

            if (_handlerFunc != null)
            {
                handler = _handlerFunc();
            }

            using (CollectorHttpClient client = new CollectorHttpClient(handler))
            {
                if (_httpClientTimeout.HasValue)
                {
                    client.Timeout = _httpClientTimeout.Value;
                }

                result = await FetchAsync(client, front, back, cancellationToken);
            }

            return(result);
        }
        protected override async Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken)
        {
            JObject root = await client.GetJObjectAsync(Index, cancellationToken);

            IEnumerable<CatalogItem> rootItems = root["items"]
                .Select(item => new CatalogItem(item))
                .Where(item => item.CommitTimeStamp > front.Value)
                .OrderBy(item => item.CommitTimeStamp);

            bool acceptNextBatch = false;

            foreach (CatalogItem rootItem in rootItems)
            {
                JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken);

                JToken context = null;
                page.TryGetValue("@context", out context);
                
                var batches = await CreateBatches(page["items"]
                    .Select(item => new CatalogItem(item))
                    .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value));

                var orderedBatches = batches
                    .OrderBy(batch => batch.CommitTimeStamp)
                    .ToList();

                var lastBatch = orderedBatches.LastOrDefault();

                foreach (var batch in orderedBatches)
                {
                    acceptNextBatch = await OnProcessBatch(
                        client, 
                        batch.Items.Select(item => item.Value), 
                        context, 
                        batch.CommitTimeStamp, 
                        batch.CommitTimeStamp == lastBatch.CommitTimeStamp, 
                        cancellationToken);

                    front.Value = batch.CommitTimeStamp;
                    await front.Save(cancellationToken);

                    Trace.TraceInformation("CommitCatalog.Fetch front.Save has value: {0}", front);

                    if (!acceptNextBatch)
                    {
                        break;
                    }
                }

                if (!acceptNextBatch)
                {
                    break;
                }
            }

            return acceptNextBatch;
        }
Exemple #3
0
        protected override async Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back)
        {
            IList <JObject> items = new List <JObject>();

            JObject root = await client.GetJObjectAsync(Index);

            IEnumerable <CatalogItem> rootItems = root["items"]
                                                  .Select(item => new CatalogItem(item))
                                                  .Where(item => item.CommitTimeStamp > front.Value)
                                                  .OrderBy(item => item.CommitTimeStamp);

            bool acceptNextBatch = false;

            foreach (CatalogItem rootItem in rootItems)
            {
                JObject page = await client.GetJObjectAsync(rootItem.Uri);

                JToken context = null;
                page.TryGetValue("@context", out context);

                var batches = page["items"]
                              .Select(item => new CatalogItem(item))
                              .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value)
                              .GroupBy(item => item.CommitTimeStamp)
                              .OrderBy(group => group.Key);

                foreach (var batch in batches)
                {
                    acceptNextBatch = await OnProcessBatch(client, batch.Select(item => item.Value), context, batch.Key);

                    front.Value = batch.Key;
                    await front.Save();

                    Trace.TraceInformation("CommitCatalog.Fetch front.Save has value: {0}", front);

                    if (!acceptNextBatch)
                    {
                        break;
                    }
                }

                if (!acceptNextBatch)
                {
                    break;
                }
            }

            return(acceptNextBatch);
        }
Exemple #4
0
        private static async Task <CatalogCommitItemBatch[]> CreateBatchesForAllAvailableItemsInPageAsync(
            ReadWriteCursor front,
            ReadCursor back,
            JObject page,
            JObject context,
            CreateCommitItemBatchesAsync createCommitItemBatchesAsync)
        {
            IEnumerable <CatalogCommitItem> commitItems = page["items"]
                                                          .Select(item => CatalogCommitItem.Create(context, (JObject)item))
                                                          .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value);

            IEnumerable <CatalogCommitItemBatch> batches = await createCommitItemBatchesAsync(commitItems);

            return(batches
                   .OrderBy(batch => batch.CommitTimeStamp)
                   .ToArray());
        }
        protected override void Init(IDictionary<string, string> arguments, CancellationToken cancellationToken)
        {
            var source = arguments.GetOrThrow<string>(Arguments.Source);
            var verbose = arguments.GetOrDefault(Arguments.Verbose, false);
            var contentBaseAddress = arguments.GetOrDefault<string>(Arguments.ContentBaseAddress);
            var storageFactory = CommandHelpers.CreateStorageFactory(arguments, verbose);

            Logger.LogInformation("CONFIG source: \"{ConfigSource}\" storage: \"{Storage}\"", source, storageFactory);

            _collector = new DnxCatalogCollector(new Uri(source), storageFactory, CommandHelpers.GetHttpMessageHandlerFactory(verbose))
            {
                ContentBaseAddress = contentBaseAddress == null ? null : new Uri(contentBaseAddress)
            };

            var storage = storageFactory.Create();
            _front = new DurableCursor(storage.ResolveUri("cursor.json"), storage, MemoryCursor.MinValue);
            _back = MemoryCursor.CreateMax();
        }
        public async Task<bool> Run(ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken)
        {
            await Task.WhenAll(front.Load(cancellationToken), back.Load(cancellationToken));

            Trace.TraceInformation("Run ( {0} , {1} )", front, back);

            bool result = false;

            HttpMessageHandler handler = null;

            if (_handlerFunc != null)
            {
                handler = _handlerFunc();
            }

            using (CollectorHttpClient client = new CollectorHttpClient(handler))
            {
                result = await Fetch(client, front, back, cancellationToken);
                RequestCount = client.RequestCount;
            }
            
            return result;
        }
        protected async Task <IEnumerable <CatalogCommit> > FetchCatalogCommitsAsync(
            CollectorHttpClient client,
            ReadWriteCursor front,
            CancellationToken cancellationToken)
        {
            JObject root;

            using (_telemetryService.TrackDuration(
                       TelemetryConstants.CatalogIndexReadDurationSeconds,
                       new Dictionary <string, string>()
            {
                { TelemetryConstants.Uri, Index.AbsoluteUri }
            }))
            {
                root = await client.GetJObjectAsync(Index, cancellationToken);
            }

            IEnumerable <CatalogCommit> commits = root["items"]
                                                  .Select(item => CatalogCommit.Create((JObject)item))
                                                  .Where(item => item.CommitTimeStamp > front.Value)
                                                  .OrderBy(item => item.CommitTimeStamp);

            return(commits);
        }
Exemple #8
0
        public async Task <bool> Run(ReadWriteCursor front, ReadCursor back)
        {
            await Task.WhenAll(front.Load(), back.Load());

            Trace.TraceInformation("Run ( {0} , {1} )", front, back);

            bool result = false;

            HttpMessageHandler handler = null;

            if (_handlerFunc != null)
            {
                handler = _handlerFunc();
            }

            using (CollectorHttpClient client = new CollectorHttpClient(handler))
            {
                result = await Fetch(client, front, back);

                RequestCount = client.RequestCount;
            }

            return(result);
        }
        protected override async Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken)
        {
            int beforeBatchCount = BatchCount;

            IList<JObject> items = new List<JObject>();

            JObject root = await client.GetJObjectAsync(Index, cancellationToken);

            JToken context = null;
            root.TryGetValue("@context", out context);

            IEnumerable<JToken> rootItems = root["items"].OrderBy(item => item["commitTimeStamp"].ToObject<DateTime>());

            DateTime resumeDateTime = front.Value;

            bool acceptNextBatch = true;

            foreach (JObject rootItem in rootItems)
            {
                if (!acceptNextBatch)
                {
                    break;
                }

                DateTime rootItemCommitTimeStamp = rootItem["commitTimeStamp"].ToObject<DateTime>();

                if (rootItemCommitTimeStamp <= front.Value)
                {
                    continue;
                }

                Uri pageUri = rootItem["@id"].ToObject<Uri>();
                JObject page = await client.GetJObjectAsync(pageUri);

                IEnumerable<JToken> pageItems = page["items"].OrderBy(item => item["commitTimeStamp"].ToObject<DateTime>());

                foreach (JObject pageItem in pageItems)
                {
                    DateTime pageItemCommitTimeStamp = pageItem["commitTimeStamp"].ToObject<DateTime>();

                    if (pageItemCommitTimeStamp <= front.Value)
                    {
                        continue;
                    }

                    if (pageItemCommitTimeStamp > back.Value)
                    {
                        break;
                    }

                    items.Add(pageItem);
                    resumeDateTime = pageItemCommitTimeStamp;

                    if (items.Count == _batchSize)
                    {
                        acceptNextBatch = await ProcessBatch(client, items, context, front, resumeDateTime, cancellationToken);

                        if (!acceptNextBatch)
                        {
                            break;
                        }
                    }
                }
            }

            if (acceptNextBatch && items.Count > 0)
            {
                await ProcessBatch(client, items, context, front, resumeDateTime, cancellationToken);
            }

            int afterBatchCount = BatchCount;

            PreviousRunBatchCount = (afterBatchCount - beforeBatchCount);

            return (PreviousRunBatchCount > 0);
        }
 protected abstract Task <bool> FetchAsync(
     CollectorHttpClient client,
     ReadWriteCursor front,
     ReadCursor back,
     CancellationToken cancellationToken);
        async Task<bool> ProcessBatch(CollectorHttpClient client, IList<JObject> items, JToken context, ReadWriteCursor front, DateTime resumeDateTime, CancellationToken cancellationToken)
        {
            bool acceptNextBatch = await OnProcessBatch(client, items, (JObject)context, resumeDateTime, cancellationToken);
            BatchCount++;
            items.Clear();

            front.Value = resumeDateTime;
            await front.Save(cancellationToken);

            return acceptNextBatch;
        }
        protected override async Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back)
        {
            int beforeBatchCount = BatchCount;

            IList <JObject> items = new List <JObject>();

            JObject root = await client.GetJObjectAsync(Index);

            JToken context = null;

            root.TryGetValue("@context", out context);

            IEnumerable <JToken> rootItems = root["items"].OrderBy(item => item["commitTimeStamp"].ToObject <DateTime>());

            DateTime resumeDateTime = front.Value;

            bool acceptNextBatch = true;

            foreach (JObject rootItem in rootItems)
            {
                if (!acceptNextBatch)
                {
                    break;
                }

                DateTime rootItemCommitTimeStamp = rootItem["commitTimeStamp"].ToObject <DateTime>();

                if (rootItemCommitTimeStamp <= front.Value)
                {
                    continue;
                }

                Uri     pageUri = rootItem["@id"].ToObject <Uri>();
                JObject page    = await client.GetJObjectAsync(pageUri);

                IEnumerable <JToken> pageItems = page["items"].OrderBy(item => item["commitTimeStamp"].ToObject <DateTime>());

                foreach (JObject pageItem in pageItems)
                {
                    DateTime pageItemCommitTimeStamp = pageItem["commitTimeStamp"].ToObject <DateTime>();

                    if (pageItemCommitTimeStamp <= front.Value)
                    {
                        continue;
                    }

                    if (pageItemCommitTimeStamp > back.Value)
                    {
                        break;
                    }

                    items.Add(pageItem);
                    resumeDateTime = pageItemCommitTimeStamp;

                    if (items.Count == _batchSize)
                    {
                        acceptNextBatch = await ProcessBatch(client, items, context, front, resumeDateTime);

                        if (!acceptNextBatch)
                        {
                            break;
                        }
                    }
                }
            }

            if (acceptNextBatch && items.Count > 0)
            {
                await ProcessBatch(client, items, context, front, resumeDateTime);
            }

            int afterBatchCount = BatchCount;

            PreviousRunBatchCount = (afterBatchCount - beforeBatchCount);

            return(PreviousRunBatchCount > 0);
        }
        async Task <bool> ProcessBatch(CollectorHttpClient client, IList <JObject> items, JToken context, ReadWriteCursor front, DateTime resumeDateTime)
        {
            bool acceptNextBatch = await OnProcessBatch(client, items, (JObject)context, resumeDateTime);

            BatchCount++;
            items.Clear();

            front.Value = resumeDateTime;
            await front.Save();

            return(acceptNextBatch);
        }
Exemple #14
0
        internal static async Task <bool> ProcessCatalogCommitsAsync(
            CollectorHttpClient client,
            ReadWriteCursor front,
            ReadCursor back,
            FetchCatalogCommitsAsync fetchCatalogCommitsAsync,
            CreateCommitItemBatchesAsync createCommitItemBatchesAsync,
            ProcessCommitItemBatchAsync processCommitItemBatchAsync,
            int maxConcurrentBatches,
            ILogger logger,
            CancellationToken cancellationToken)
        {
            var rootItems = await fetchCatalogCommitsAsync(client, front, back, cancellationToken);

            var hasAnyBatchFailed        = false;
            var hasAnyBatchBeenProcessed = false;

            foreach (CatalogCommit rootItem in rootItems)
            {
                JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken);

                var context = (JObject)page["@context"];
                CatalogCommitItemBatch[] batches = await CreateBatchesForAllAvailableItemsInPageAsync(
                    front,
                    back,
                    page,
                    context,
                    createCommitItemBatchesAsync);

                if (!batches.Any())
                {
                    continue;
                }

                hasAnyBatchBeenProcessed = true;

                DateTime maxCommitTimeStamp = GetMaxCommitTimeStamp(batches);
                var      unprocessedBatches = batches.ToList();
                var      processingBatches  = new List <CatalogCommitItemBatchTask>();
                var      exceptions         = new List <Exception>();

                StartProcessingBatchesIfNoFailures(
                    client,
                    context,
                    unprocessedBatches,
                    processingBatches,
                    maxConcurrentBatches,
                    processCommitItemBatchAsync,
                    cancellationToken);

                while (processingBatches.Any())
                {
                    var activeTasks = processingBatches.Where(batch => !batch.Task.IsCompleted)
                                      .Select(batch => batch.Task)
                                      .DefaultIfEmpty(Task.CompletedTask);

                    await Task.WhenAny(activeTasks);

                    for (var i = 0; i < processingBatches.Count; ++i)
                    {
                        var batch = processingBatches[i];

                        if (batch.Task.IsFaulted || batch.Task.IsCanceled)
                        {
                            hasAnyBatchFailed = true;

                            if (batch.Task.Exception != null)
                            {
                                var exception = ExceptionUtilities.Unwrap(batch.Task.Exception);

                                exceptions.Add(exception);
                            }
                        }

                        if (batch.Task.IsCompleted)
                        {
                            processingBatches.RemoveAt(i);
                            --i;
                        }
                    }

                    if (!hasAnyBatchFailed)
                    {
                        StartProcessingBatchesIfNoFailures(
                            client,
                            context,
                            unprocessedBatches,
                            processingBatches,
                            maxConcurrentBatches,
                            processCommitItemBatchAsync,
                            cancellationToken);
                    }
                }

                if (hasAnyBatchFailed)
                {
                    foreach (var exception in exceptions)
                    {
                        logger.LogError(_eventId, exception, Strings.BatchProcessingFailure);
                    }

                    var innerException = exceptions.Count == 1 ? exceptions.Single() : new AggregateException(exceptions);

                    throw new BatchProcessingException(innerException);
                }

                front.Value = maxCommitTimeStamp;

                await front.SaveAsync(cancellationToken);

                Trace.TraceInformation($"{nameof(CatalogCommitUtilities)}.{nameof(ProcessCatalogCommitsAsync)} " +
                                       $"{nameof(front)}.{nameof(front.Value)} saved since timestamp changed from previous: {{0}}", front);
            }

            return(hasAnyBatchBeenProcessed);
        }
 protected abstract Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken);
        protected override async Task <bool> FetchAsync(
            CollectorHttpClient client,
            ReadWriteCursor front,
            ReadCursor back,
            CancellationToken cancellationToken)
        {
            IEnumerable <CatalogCommit> commits = await FetchCatalogCommitsAsync(client, front, cancellationToken);

            bool acceptNextBatch = false;

            foreach (CatalogCommit commit in commits)
            {
                JObject page = await client.GetJObjectAsync(commit.Uri, cancellationToken);

                JToken context = null;
                page.TryGetValue("@context", out context);

                var batches = await CreateBatchesAsync(page["items"]
                                                       .Select(item => CatalogCommitItem.Create((JObject)context, (JObject)item))
                                                       .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value));

                var orderedBatches = batches
                                     .OrderBy(batch => batch.CommitTimeStamp)
                                     .ToList();

                var      lastBatch = orderedBatches.LastOrDefault();
                DateTime?previousCommitTimeStamp = null;

                foreach (var batch in orderedBatches)
                {
                    // If the commit timestamp has changed from the previous batch, commit. This is important because if
                    // two batches have the same commit timestamp but processing the second fails, we should not
                    // progress the cursor forward.
                    if (previousCommitTimeStamp.HasValue && previousCommitTimeStamp != batch.CommitTimeStamp)
                    {
                        front.Value = previousCommitTimeStamp.Value;
                        await front.SaveAsync(cancellationToken);

                        Trace.TraceInformation("CommitCatalog.Fetch front.Value saved since timestamp changed from previous: {0}", front);
                    }

                    using (_telemetryService.TrackDuration(TelemetryConstants.ProcessBatchSeconds, new Dictionary <string, string>()
                    {
                        { TelemetryConstants.BatchItemCount, batch.Items.Count.ToString() }
                    }))
                    {
                        acceptNextBatch = await OnProcessBatchAsync(
                            client,
                            batch.Items,
                            context,
                            batch.CommitTimeStamp,
                            batch.CommitTimeStamp == lastBatch.CommitTimeStamp,
                            cancellationToken);
                    }

                    // If this is the last batch, commit the cursor.
                    if (ReferenceEquals(batch, lastBatch))
                    {
                        front.Value = batch.CommitTimeStamp;
                        await front.SaveAsync(cancellationToken);

                        Trace.TraceInformation("CommitCatalog.Fetch front.Value saved due to last batch: {0}", front);
                    }

                    previousCommitTimeStamp = batch.CommitTimeStamp;

                    Trace.TraceInformation("CommitCatalog.Fetch front.Value is: {0}", front);

                    if (!acceptNextBatch)
                    {
                        break;
                    }
                }

                if (!acceptNextBatch)
                {
                    break;
                }
            }

            return(acceptNextBatch);
        }
Exemple #17
0
 protected abstract Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back);
        protected override void Init(IDictionary<string, string> arguments, CancellationToken cancellationToken)
        {
            var source = arguments.GetOrThrow<string>(Arguments.Source);
            var unlistShouldDelete = arguments.GetOrDefault(Arguments.UnlistShouldDelete, false);
            var verbose = arguments.GetOrDefault(Arguments.Verbose, false);

            var contentBaseAddress = arguments.GetOrDefault<string>(Arguments.ContentBaseAddress);

            StorageFactory storageFactoryToUse;

            var storageFactory = CommandHelpers.CreateStorageFactory(arguments, verbose);
            var compressedStorageFactory = CommandHelpers.CreateCompressedStorageFactory(arguments, verbose);

            Logger.LogInformation("CONFIG source: \"{ConfigSource}\" storage: \"{Storage}\"", source, storageFactory);

            RegistrationMakerCatalogItem.PackagePathProvider = new PackagesFolderPackagePathProvider();

            if (compressedStorageFactory != null)
            {
                var secondaryStorageBaseUrlRewriter = new SecondaryStorageBaseUrlRewriter(new List<KeyValuePair<string, string>>
                {
                    // always rewrite storage root url in seconary
                    new KeyValuePair<string, string>(storageFactory.BaseAddress.ToString(), compressedStorageFactory.BaseAddress.ToString())
                });

                var aggregateStorageFactory = new AggregateStorageFactory(
                    storageFactory,
                    new[] { compressedStorageFactory },
                    secondaryStorageBaseUrlRewriter.Rewrite);

                storageFactoryToUse = aggregateStorageFactory;
            }
            else
            {
                storageFactoryToUse = storageFactory;
            }

            _collector = new RegistrationCollector(new Uri(source), storageFactoryToUse, CommandHelpers.GetHttpMessageHandlerFactory(verbose))
            {
                ContentBaseAddress = contentBaseAddress == null
                    ? null
                    : new Uri(contentBaseAddress)
            };

            var storage = storageFactoryToUse.Create();
            _front = new DurableCursor(storage.ResolveUri("cursor.json"), storage, MemoryCursor.MinValue);
            _back = MemoryCursor.CreateMax();
        }