public async Task <bool> RunAsync(ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { await Task.WhenAll(front.LoadAsync(cancellationToken), back.LoadAsync(cancellationToken)); Trace.TraceInformation("Run ( {0} , {1} )", front, back); bool result = false; HttpMessageHandler handler = null; if (_handlerFunc != null) { handler = _handlerFunc(); } using (CollectorHttpClient client = new CollectorHttpClient(handler)) { if (_httpClientTimeout.HasValue) { client.Timeout = _httpClientTimeout.Value; } result = await FetchAsync(client, front, back, cancellationToken); } return(result); }
protected override async Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { JObject root = await client.GetJObjectAsync(Index, cancellationToken); IEnumerable<CatalogItem> rootItems = root["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value) .OrderBy(item => item.CommitTimeStamp); bool acceptNextBatch = false; foreach (CatalogItem rootItem in rootItems) { JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken); JToken context = null; page.TryGetValue("@context", out context); var batches = await CreateBatches(page["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value)); var orderedBatches = batches .OrderBy(batch => batch.CommitTimeStamp) .ToList(); var lastBatch = orderedBatches.LastOrDefault(); foreach (var batch in orderedBatches) { acceptNextBatch = await OnProcessBatch( client, batch.Items.Select(item => item.Value), context, batch.CommitTimeStamp, batch.CommitTimeStamp == lastBatch.CommitTimeStamp, cancellationToken); front.Value = batch.CommitTimeStamp; await front.Save(cancellationToken); Trace.TraceInformation("CommitCatalog.Fetch front.Save has value: {0}", front); if (!acceptNextBatch) { break; } } if (!acceptNextBatch) { break; } } return acceptNextBatch; }
protected override async Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back) { IList <JObject> items = new List <JObject>(); JObject root = await client.GetJObjectAsync(Index); IEnumerable <CatalogItem> rootItems = root["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value) .OrderBy(item => item.CommitTimeStamp); bool acceptNextBatch = false; foreach (CatalogItem rootItem in rootItems) { JObject page = await client.GetJObjectAsync(rootItem.Uri); JToken context = null; page.TryGetValue("@context", out context); var batches = page["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value) .GroupBy(item => item.CommitTimeStamp) .OrderBy(group => group.Key); foreach (var batch in batches) { acceptNextBatch = await OnProcessBatch(client, batch.Select(item => item.Value), context, batch.Key); front.Value = batch.Key; await front.Save(); Trace.TraceInformation("CommitCatalog.Fetch front.Save has value: {0}", front); if (!acceptNextBatch) { break; } } if (!acceptNextBatch) { break; } } return(acceptNextBatch); }
private static async Task <CatalogCommitItemBatch[]> CreateBatchesForAllAvailableItemsInPageAsync( ReadWriteCursor front, ReadCursor back, JObject page, JObject context, CreateCommitItemBatchesAsync createCommitItemBatchesAsync) { IEnumerable <CatalogCommitItem> commitItems = page["items"] .Select(item => CatalogCommitItem.Create(context, (JObject)item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value); IEnumerable <CatalogCommitItemBatch> batches = await createCommitItemBatchesAsync(commitItems); return(batches .OrderBy(batch => batch.CommitTimeStamp) .ToArray()); }
protected override void Init(IDictionary<string, string> arguments, CancellationToken cancellationToken) { var source = arguments.GetOrThrow<string>(Arguments.Source); var verbose = arguments.GetOrDefault(Arguments.Verbose, false); var contentBaseAddress = arguments.GetOrDefault<string>(Arguments.ContentBaseAddress); var storageFactory = CommandHelpers.CreateStorageFactory(arguments, verbose); Logger.LogInformation("CONFIG source: \"{ConfigSource}\" storage: \"{Storage}\"", source, storageFactory); _collector = new DnxCatalogCollector(new Uri(source), storageFactory, CommandHelpers.GetHttpMessageHandlerFactory(verbose)) { ContentBaseAddress = contentBaseAddress == null ? null : new Uri(contentBaseAddress) }; var storage = storageFactory.Create(); _front = new DurableCursor(storage.ResolveUri("cursor.json"), storage, MemoryCursor.MinValue); _back = MemoryCursor.CreateMax(); }
public async Task<bool> Run(ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { await Task.WhenAll(front.Load(cancellationToken), back.Load(cancellationToken)); Trace.TraceInformation("Run ( {0} , {1} )", front, back); bool result = false; HttpMessageHandler handler = null; if (_handlerFunc != null) { handler = _handlerFunc(); } using (CollectorHttpClient client = new CollectorHttpClient(handler)) { result = await Fetch(client, front, back, cancellationToken); RequestCount = client.RequestCount; } return result; }
protected async Task <IEnumerable <CatalogCommit> > FetchCatalogCommitsAsync( CollectorHttpClient client, ReadWriteCursor front, CancellationToken cancellationToken) { JObject root; using (_telemetryService.TrackDuration( TelemetryConstants.CatalogIndexReadDurationSeconds, new Dictionary <string, string>() { { TelemetryConstants.Uri, Index.AbsoluteUri } })) { root = await client.GetJObjectAsync(Index, cancellationToken); } IEnumerable <CatalogCommit> commits = root["items"] .Select(item => CatalogCommit.Create((JObject)item)) .Where(item => item.CommitTimeStamp > front.Value) .OrderBy(item => item.CommitTimeStamp); return(commits); }
public async Task <bool> Run(ReadWriteCursor front, ReadCursor back) { await Task.WhenAll(front.Load(), back.Load()); Trace.TraceInformation("Run ( {0} , {1} )", front, back); bool result = false; HttpMessageHandler handler = null; if (_handlerFunc != null) { handler = _handlerFunc(); } using (CollectorHttpClient client = new CollectorHttpClient(handler)) { result = await Fetch(client, front, back); RequestCount = client.RequestCount; } return(result); }
protected override async Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { int beforeBatchCount = BatchCount; IList<JObject> items = new List<JObject>(); JObject root = await client.GetJObjectAsync(Index, cancellationToken); JToken context = null; root.TryGetValue("@context", out context); IEnumerable<JToken> rootItems = root["items"].OrderBy(item => item["commitTimeStamp"].ToObject<DateTime>()); DateTime resumeDateTime = front.Value; bool acceptNextBatch = true; foreach (JObject rootItem in rootItems) { if (!acceptNextBatch) { break; } DateTime rootItemCommitTimeStamp = rootItem["commitTimeStamp"].ToObject<DateTime>(); if (rootItemCommitTimeStamp <= front.Value) { continue; } Uri pageUri = rootItem["@id"].ToObject<Uri>(); JObject page = await client.GetJObjectAsync(pageUri); IEnumerable<JToken> pageItems = page["items"].OrderBy(item => item["commitTimeStamp"].ToObject<DateTime>()); foreach (JObject pageItem in pageItems) { DateTime pageItemCommitTimeStamp = pageItem["commitTimeStamp"].ToObject<DateTime>(); if (pageItemCommitTimeStamp <= front.Value) { continue; } if (pageItemCommitTimeStamp > back.Value) { break; } items.Add(pageItem); resumeDateTime = pageItemCommitTimeStamp; if (items.Count == _batchSize) { acceptNextBatch = await ProcessBatch(client, items, context, front, resumeDateTime, cancellationToken); if (!acceptNextBatch) { break; } } } } if (acceptNextBatch && items.Count > 0) { await ProcessBatch(client, items, context, front, resumeDateTime, cancellationToken); } int afterBatchCount = BatchCount; PreviousRunBatchCount = (afterBatchCount - beforeBatchCount); return (PreviousRunBatchCount > 0); }
protected abstract Task <bool> FetchAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken);
async Task<bool> ProcessBatch(CollectorHttpClient client, IList<JObject> items, JToken context, ReadWriteCursor front, DateTime resumeDateTime, CancellationToken cancellationToken) { bool acceptNextBatch = await OnProcessBatch(client, items, (JObject)context, resumeDateTime, cancellationToken); BatchCount++; items.Clear(); front.Value = resumeDateTime; await front.Save(cancellationToken); return acceptNextBatch; }
protected override async Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back) { int beforeBatchCount = BatchCount; IList <JObject> items = new List <JObject>(); JObject root = await client.GetJObjectAsync(Index); JToken context = null; root.TryGetValue("@context", out context); IEnumerable <JToken> rootItems = root["items"].OrderBy(item => item["commitTimeStamp"].ToObject <DateTime>()); DateTime resumeDateTime = front.Value; bool acceptNextBatch = true; foreach (JObject rootItem in rootItems) { if (!acceptNextBatch) { break; } DateTime rootItemCommitTimeStamp = rootItem["commitTimeStamp"].ToObject <DateTime>(); if (rootItemCommitTimeStamp <= front.Value) { continue; } Uri pageUri = rootItem["@id"].ToObject <Uri>(); JObject page = await client.GetJObjectAsync(pageUri); IEnumerable <JToken> pageItems = page["items"].OrderBy(item => item["commitTimeStamp"].ToObject <DateTime>()); foreach (JObject pageItem in pageItems) { DateTime pageItemCommitTimeStamp = pageItem["commitTimeStamp"].ToObject <DateTime>(); if (pageItemCommitTimeStamp <= front.Value) { continue; } if (pageItemCommitTimeStamp > back.Value) { break; } items.Add(pageItem); resumeDateTime = pageItemCommitTimeStamp; if (items.Count == _batchSize) { acceptNextBatch = await ProcessBatch(client, items, context, front, resumeDateTime); if (!acceptNextBatch) { break; } } } } if (acceptNextBatch && items.Count > 0) { await ProcessBatch(client, items, context, front, resumeDateTime); } int afterBatchCount = BatchCount; PreviousRunBatchCount = (afterBatchCount - beforeBatchCount); return(PreviousRunBatchCount > 0); }
async Task <bool> ProcessBatch(CollectorHttpClient client, IList <JObject> items, JToken context, ReadWriteCursor front, DateTime resumeDateTime) { bool acceptNextBatch = await OnProcessBatch(client, items, (JObject)context, resumeDateTime); BatchCount++; items.Clear(); front.Value = resumeDateTime; await front.Save(); return(acceptNextBatch); }
internal static async Task <bool> ProcessCatalogCommitsAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, FetchCatalogCommitsAsync fetchCatalogCommitsAsync, CreateCommitItemBatchesAsync createCommitItemBatchesAsync, ProcessCommitItemBatchAsync processCommitItemBatchAsync, int maxConcurrentBatches, ILogger logger, CancellationToken cancellationToken) { var rootItems = await fetchCatalogCommitsAsync(client, front, back, cancellationToken); var hasAnyBatchFailed = false; var hasAnyBatchBeenProcessed = false; foreach (CatalogCommit rootItem in rootItems) { JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken); var context = (JObject)page["@context"]; CatalogCommitItemBatch[] batches = await CreateBatchesForAllAvailableItemsInPageAsync( front, back, page, context, createCommitItemBatchesAsync); if (!batches.Any()) { continue; } hasAnyBatchBeenProcessed = true; DateTime maxCommitTimeStamp = GetMaxCommitTimeStamp(batches); var unprocessedBatches = batches.ToList(); var processingBatches = new List <CatalogCommitItemBatchTask>(); var exceptions = new List <Exception>(); StartProcessingBatchesIfNoFailures( client, context, unprocessedBatches, processingBatches, maxConcurrentBatches, processCommitItemBatchAsync, cancellationToken); while (processingBatches.Any()) { var activeTasks = processingBatches.Where(batch => !batch.Task.IsCompleted) .Select(batch => batch.Task) .DefaultIfEmpty(Task.CompletedTask); await Task.WhenAny(activeTasks); for (var i = 0; i < processingBatches.Count; ++i) { var batch = processingBatches[i]; if (batch.Task.IsFaulted || batch.Task.IsCanceled) { hasAnyBatchFailed = true; if (batch.Task.Exception != null) { var exception = ExceptionUtilities.Unwrap(batch.Task.Exception); exceptions.Add(exception); } } if (batch.Task.IsCompleted) { processingBatches.RemoveAt(i); --i; } } if (!hasAnyBatchFailed) { StartProcessingBatchesIfNoFailures( client, context, unprocessedBatches, processingBatches, maxConcurrentBatches, processCommitItemBatchAsync, cancellationToken); } } if (hasAnyBatchFailed) { foreach (var exception in exceptions) { logger.LogError(_eventId, exception, Strings.BatchProcessingFailure); } var innerException = exceptions.Count == 1 ? exceptions.Single() : new AggregateException(exceptions); throw new BatchProcessingException(innerException); } front.Value = maxCommitTimeStamp; await front.SaveAsync(cancellationToken); Trace.TraceInformation($"{nameof(CatalogCommitUtilities)}.{nameof(ProcessCatalogCommitsAsync)} " + $"{nameof(front)}.{nameof(front.Value)} saved since timestamp changed from previous: {{0}}", front); } return(hasAnyBatchBeenProcessed); }
protected abstract Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken);
protected override async Task <bool> FetchAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { IEnumerable <CatalogCommit> commits = await FetchCatalogCommitsAsync(client, front, cancellationToken); bool acceptNextBatch = false; foreach (CatalogCommit commit in commits) { JObject page = await client.GetJObjectAsync(commit.Uri, cancellationToken); JToken context = null; page.TryGetValue("@context", out context); var batches = await CreateBatchesAsync(page["items"] .Select(item => CatalogCommitItem.Create((JObject)context, (JObject)item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value)); var orderedBatches = batches .OrderBy(batch => batch.CommitTimeStamp) .ToList(); var lastBatch = orderedBatches.LastOrDefault(); DateTime?previousCommitTimeStamp = null; foreach (var batch in orderedBatches) { // If the commit timestamp has changed from the previous batch, commit. This is important because if // two batches have the same commit timestamp but processing the second fails, we should not // progress the cursor forward. if (previousCommitTimeStamp.HasValue && previousCommitTimeStamp != batch.CommitTimeStamp) { front.Value = previousCommitTimeStamp.Value; await front.SaveAsync(cancellationToken); Trace.TraceInformation("CommitCatalog.Fetch front.Value saved since timestamp changed from previous: {0}", front); } using (_telemetryService.TrackDuration(TelemetryConstants.ProcessBatchSeconds, new Dictionary <string, string>() { { TelemetryConstants.BatchItemCount, batch.Items.Count.ToString() } })) { acceptNextBatch = await OnProcessBatchAsync( client, batch.Items, context, batch.CommitTimeStamp, batch.CommitTimeStamp == lastBatch.CommitTimeStamp, cancellationToken); } // If this is the last batch, commit the cursor. if (ReferenceEquals(batch, lastBatch)) { front.Value = batch.CommitTimeStamp; await front.SaveAsync(cancellationToken); Trace.TraceInformation("CommitCatalog.Fetch front.Value saved due to last batch: {0}", front); } previousCommitTimeStamp = batch.CommitTimeStamp; Trace.TraceInformation("CommitCatalog.Fetch front.Value is: {0}", front); if (!acceptNextBatch) { break; } } if (!acceptNextBatch) { break; } } return(acceptNextBatch); }
protected abstract Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back);
protected override void Init(IDictionary<string, string> arguments, CancellationToken cancellationToken) { var source = arguments.GetOrThrow<string>(Arguments.Source); var unlistShouldDelete = arguments.GetOrDefault(Arguments.UnlistShouldDelete, false); var verbose = arguments.GetOrDefault(Arguments.Verbose, false); var contentBaseAddress = arguments.GetOrDefault<string>(Arguments.ContentBaseAddress); StorageFactory storageFactoryToUse; var storageFactory = CommandHelpers.CreateStorageFactory(arguments, verbose); var compressedStorageFactory = CommandHelpers.CreateCompressedStorageFactory(arguments, verbose); Logger.LogInformation("CONFIG source: \"{ConfigSource}\" storage: \"{Storage}\"", source, storageFactory); RegistrationMakerCatalogItem.PackagePathProvider = new PackagesFolderPackagePathProvider(); if (compressedStorageFactory != null) { var secondaryStorageBaseUrlRewriter = new SecondaryStorageBaseUrlRewriter(new List<KeyValuePair<string, string>> { // always rewrite storage root url in seconary new KeyValuePair<string, string>(storageFactory.BaseAddress.ToString(), compressedStorageFactory.BaseAddress.ToString()) }); var aggregateStorageFactory = new AggregateStorageFactory( storageFactory, new[] { compressedStorageFactory }, secondaryStorageBaseUrlRewriter.Rewrite); storageFactoryToUse = aggregateStorageFactory; } else { storageFactoryToUse = storageFactory; } _collector = new RegistrationCollector(new Uri(source), storageFactoryToUse, CommandHelpers.GetHttpMessageHandlerFactory(verbose)) { ContentBaseAddress = contentBaseAddress == null ? null : new Uri(contentBaseAddress) }; var storage = storageFactoryToUse.Create(); _front = new DurableCursor(storage.ResolveUri("cursor.json"), storage, MemoryCursor.MinValue); _back = MemoryCursor.CreateMax(); }