public async Task <bool> RunAsync(ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { await Task.WhenAll(front.LoadAsync(cancellationToken), back.LoadAsync(cancellationToken)); Trace.TraceInformation("Run ( {0} , {1} )", front, back); bool result = false; HttpMessageHandler handler = null; if (_handlerFunc != null) { handler = _handlerFunc(); } using (CollectorHttpClient client = new CollectorHttpClient(handler)) { if (_httpClientTimeout.HasValue) { client.Timeout = _httpClientTimeout.Value; } result = await FetchAsync(client, front, back, cancellationToken); } return(result); }
protected override async Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { foreach (JToken item in items) { string id = item["nuget:id"].ToString().ToLowerInvariant(); string version = item["nuget:version"].ToString().ToLowerInvariant(); Storage storage = _storageFactory.Create(id); string nuspec = await LoadNuspec(id, version, cancellationToken); if (nuspec != null) { await SaveNuspec(storage, id, version, nuspec, cancellationToken); await CopyNupkg(storage, id, version, cancellationToken); await UpdateMetadata(storage, version, cancellationToken); Trace.TraceInformation("commit: {0}/{1}", id, version); } else { Trace.TraceWarning("no nuspec available for {0}/{1} skipping", id, version); } } return true; }
protected override async Task <bool> OnProcessBatch(CollectorHttpClient client, IEnumerable <JToken> items, JToken context, DateTime commitTimeStamp) { List <Task <IGraph> > tasks = new List <Task <IGraph> >(); foreach (JObject item in items) { if (Utils.IsType((JObject)context, item, _types)) { Uri itemUri = item["@id"].ToObject <Uri>(); tasks.Add(client.GetGraphAsync(itemUri)); } } if (tasks.Count > 0) { await Task.WhenAll(tasks.ToArray()); TripleStore store = new TripleStore(); foreach (Task <IGraph> task in tasks) { store.Add(task.Result, true); } await ProcessStore(store); } return(true); }
protected override async Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, bool isLastBatch, CancellationToken cancellationToken) { JObject catalogIndex = (_baseAddress != null) ? await client.GetJObjectAsync(Index, cancellationToken) : null; IEnumerable<JObject> catalogItems = await FetchCatalogItems(client, items, cancellationToken); var numDocs = _indexWriter.NumDocs(); _logger.LogInformation(string.Format("Index contains {0} documents.", _indexWriter.NumDocs())); ProcessCatalogIndex(_indexWriter, catalogIndex, _baseAddress); ProcessCatalogItems(_indexWriter, catalogItems, _baseAddress); var docsDifference = _indexWriter.NumDocs() - numDocs; UpdateCommitMetadata(commitTimeStamp, docsDifference); _logger.LogInformation(string.Format("Processed catalog items. Index now contains {0} documents. (total uncommitted {1}, batch {2})", _indexWriter.NumDocs(), _metadataForNextCommit.Count, docsDifference)); if (_commitEachBatch || isLastBatch) { EnsureCommitted(); } return true; }
protected override async Task <bool> OnProcessBatch(CollectorHttpClient client, IEnumerable <JToken> items, JToken context, DateTime commitTimeStamp) { IDictionary <string, IList <JObject> > sortedItems = new Dictionary <string, IList <JObject> >(); foreach (JObject item in items) { string key = GetKey(item); IList <JObject> itemList; if (!sortedItems.TryGetValue(key, out itemList)) { itemList = new List <JObject>(); sortedItems.Add(key, itemList); } itemList.Add(item); } IList <Task> tasks = new List <Task>(); foreach (KeyValuePair <string, IList <JObject> > sortedBatch in sortedItems) { Task task = ProcessSortedBatch(client, sortedBatch, context); tasks.Add(task); } await Task.WhenAll(tasks.ToArray()); return(true); }
protected override async Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, bool isLastBatch, CancellationToken cancellationToken) { List<Task<IGraph>> tasks = new List<Task<IGraph>>(); foreach (JObject item in items) { if (Utils.IsType((JObject)context, item, _types)) { Uri itemUri = item["@id"].ToObject<Uri>(); tasks.Add(client.GetGraphAsync(itemUri)); } } if (tasks.Count > 0) { await Task.WhenAll(tasks.ToArray()); TripleStore store = new TripleStore(); foreach (Task<IGraph> task in tasks) { store.Add(task.Result, true); } await ProcessStore(store, cancellationToken); } return true; }
protected abstract Task <bool> OnProcessBatchAsync( CollectorHttpClient client, IEnumerable <CatalogCommitItem> items, JToken context, DateTime commitTimeStamp, bool isLastBatch, CancellationToken cancellationToken);
public static async Task ProcessGraphs( string id, IDictionary <string, IGraph> sortedGraphs, StorageFactory storageFactory, Uri contentBaseAddress, int partitionSize, int packageCountThreshold) { int versionAlreadyExistsCount = 0; existingVersionsWithID = new List <string>(); try { Storage storage = storageFactory.Create(id.ToLowerInvariant()); Uri resourceUri = storage.ResolveUri("index.json"); string json = await storage.LoadString(resourceUri); int count = Utils.CountItems(json); //Determine if there are any versions that are existing already CollectorHttpClient httpClient = new CollectorHttpClient(); foreach (var graph in sortedGraphs) { JObject jsonContent = await httpClient.GetJObjectAsync(new Uri(graph.Key)); string existingId = jsonContent["@id"].ToString(); string existingVersionWithId = existingId.Substring(existingId.LastIndexOf("/") + 1); string existingVersion = jsonContent["version"].ToString() + ".json"; //Determine if the version is actually available //In Registration blobs, the format is /packageID/packageVersion.json //So to check the existence of version we need to know only the version.json if (storage.Exists(existingVersion)) { //When we compare later in AddExistingItems, we need the "packageId.packageversion.json" for comparison so store it with Id existingVersionsWithID.Add(existingVersionWithId); versionAlreadyExistsCount++; } } int total = count + sortedGraphs.Count - versionAlreadyExistsCount; if (total < packageCountThreshold) { await SaveSmallRegistration(storage, storageFactory.BaseAddress, sortedGraphs, contentBaseAddress, partitionSize); } else { await SaveLargeRegistration(storage, storageFactory.BaseAddress, sortedGraphs, json, contentBaseAddress, partitionSize); } } catch (Exception e) { throw new Exception(string.Format("Process id = {0}", id), e); } }
public static async Task ProcessGraphs( string id, IDictionary<string, IGraph> sortedGraphs, StorageFactory storageFactory, Uri contentBaseAddress, int partitionSize, int packageCountThreshold, CancellationToken cancellationToken) { int versionAlreadyExistsCount = 0; existingVersionsWithID = new List<string>(); try { Storage storage = storageFactory.Create(id.ToLowerInvariant()); Uri resourceUri = storage.ResolveUri("index.json"); string json = await storage.LoadString(resourceUri, cancellationToken); int count = Utils.CountItems(json); //Determine if there are any versions that are existing already CollectorHttpClient httpClient = new CollectorHttpClient(); foreach (var graph in sortedGraphs) { JObject jsonContent = await httpClient.GetJObjectAsync(new Uri(graph.Key), cancellationToken); string existingId = jsonContent["@id"].ToString(); string existingVersionWithId = existingId.Substring(existingId.LastIndexOf("/") + 1); string existingVersion = jsonContent["version"].ToString() + ".json"; //Determine if the version is actually available //In Registration blobs, the format is /packageID/packageVersion.json //So to check the existence of version we need to know only the version.json if (storage.Exists(existingVersion)) { //When we compare later in AddExistingItems, we need the "packageId.packageversion.json" for comparison so store it with Id existingVersionsWithID.Add(existingVersionWithId); versionAlreadyExistsCount++; } } int total = count + sortedGraphs.Count - versionAlreadyExistsCount; if (total < packageCountThreshold) { await SaveSmallRegistration(storage, storageFactory.BaseAddress, sortedGraphs, contentBaseAddress, partitionSize, cancellationToken); } else { await SaveLargeRegistration(storage, storageFactory.BaseAddress, sortedGraphs, json, contentBaseAddress, partitionSize, cancellationToken); } } catch (Exception e) { throw new Exception(string.Format("Process id = {0}", id), e); } }
protected override async Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { JObject root = await client.GetJObjectAsync(Index, cancellationToken); IEnumerable<CatalogItem> rootItems = root["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value) .OrderBy(item => item.CommitTimeStamp); bool acceptNextBatch = false; foreach (CatalogItem rootItem in rootItems) { JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken); JToken context = null; page.TryGetValue("@context", out context); var batches = await CreateBatches(page["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value)); var orderedBatches = batches .OrderBy(batch => batch.CommitTimeStamp) .ToList(); var lastBatch = orderedBatches.LastOrDefault(); foreach (var batch in orderedBatches) { acceptNextBatch = await OnProcessBatch( client, batch.Items.Select(item => item.Value), context, batch.CommitTimeStamp, batch.CommitTimeStamp == lastBatch.CommitTimeStamp, cancellationToken); front.Value = batch.CommitTimeStamp; await front.Save(cancellationToken); Trace.TraceInformation("CommitCatalog.Fetch front.Save has value: {0}", front); if (!acceptNextBatch) { break; } } if (!acceptNextBatch) { break; } } return acceptNextBatch; }
protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { foreach (JObject item in items) { Console.WriteLine("{0} {1}", _name, item["@id"].ToString()); } return Task.FromResult(true); }
protected override async Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { IEnumerable<JObject> catalogItems = await FetchCatalogItems(client, items, cancellationToken); foreach (var entry in catalogItems) { Console.WriteLine(entry["id"]); } return await Task.FromResult(true); }
async Task <bool> ProcessBatch(CollectorHttpClient client, IList <JObject> items, JToken context, ReadWriteCursor front, DateTime resumeDateTime) { bool acceptNextBatch = await OnProcessBatch(client, items, (JObject)context, resumeDateTime); BatchCount++; items.Clear(); front.Value = resumeDateTime; await front.Save(); return(acceptNextBatch); }
protected override async Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back) { IList <JObject> items = new List <JObject>(); JObject root = await client.GetJObjectAsync(Index); IEnumerable <CatalogItem> rootItems = root["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value) .OrderBy(item => item.CommitTimeStamp); bool acceptNextBatch = false; foreach (CatalogItem rootItem in rootItems) { JObject page = await client.GetJObjectAsync(rootItem.Uri); JToken context = null; page.TryGetValue("@context", out context); var batches = page["items"] .Select(item => new CatalogItem(item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value) .GroupBy(item => item.CommitTimeStamp) .OrderBy(group => group.Key); foreach (var batch in batches) { acceptNextBatch = await OnProcessBatch(client, batch.Select(item => item.Value), context, batch.Key); front.Value = batch.Key; await front.Save(); Trace.TraceInformation("CommitCatalog.Fetch front.Save has value: {0}", front); if (!acceptNextBatch) { break; } } if (!acceptNextBatch) { break; } } return(acceptNextBatch); }
static async Task<IEnumerable<JObject>> FetchCatalogItems(CollectorHttpClient client, IEnumerable<JToken> items, CancellationToken cancellationToken) { IList<Task<JObject>> tasks = new List<Task<JObject>>(); foreach (JToken item in items) { Uri catalogItemUri = item["@id"].ToObject<Uri>(); tasks.Add(client.GetJObjectAsync(catalogItemUri, cancellationToken)); } await Task.WhenAll(tasks); return tasks.Select(t => t.Result); }
protected override async Task ProcessSortedBatchAsync( CollectorHttpClient client, KeyValuePair <string, IList <CatalogCommitItem> > sortedBatch, JToken context, CancellationToken cancellationToken) { var graphs = new Dictionary <string, IGraph>(); var graphTasks = new Dictionary <string, Task <IGraph> >(); foreach (var item in sortedBatch.Value) { var isMatch = false; foreach (Uri type in _types) { if (item.TypeUris.Any(typeUri => typeUri.AbsoluteUri == type.AbsoluteUri)) { isMatch = true; break; } } if (isMatch) { // Load package details from catalog. // Download the graph to a read-only container. This allows operations on each graph to be safely // parallelized. var task = client.GetGraphAsync(item.Uri, readOnly: true, token: cancellationToken); graphTasks.Add(item.Uri.AbsoluteUri, task); } } await Task.WhenAll(graphTasks.Values.ToArray()); foreach (var task in graphTasks) { graphs.Add(task.Key, task.Value.Result); } if (graphs.Count > 0) { var sortedGraphs = new KeyValuePair <string, IReadOnlyDictionary <string, IGraph> >(sortedBatch.Key, graphs); await ProcessGraphsAsync(sortedGraphs, cancellationToken); } }
public void CatalogTest_VerifyPackage() { string pathA = CreateNupkg("MyPackage", "1.2.1"); CatalogStep step = new CatalogStep(Config, new string[] { pathA }); step.Run(); var file = Config.Catalog.LocalFolder.GetFiles("mypackage.1.2.1.json", SearchOption.AllDirectories).FirstOrDefault(); JsonTextReader reader = new JsonTextReader(file.OpenText()); CollectorHttpClient client = new CollectorHttpClient(Config.Catalog.FileSystemEmulator); var task = client.GetGraphAsync(new Uri(Config.Catalog.BaseAddress.AbsoluteUri + "/index.json")); task.Wait(); IGraph graph = task.Result; }
protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); analyzer.AddAnalyzer("Id", new IdentifierKeywordAnalyzer()); int i = 0; using (IndexWriter writer = new IndexWriter(_directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED)) { foreach (JObject item in items) { i++; string id = item["nuget:id"].ToString(); string version = item["nuget:version"].ToString(); BooleanQuery query = new BooleanQuery(); query.Add(new BooleanClause(new TermQuery(new Term("Id", id.ToLowerInvariant())), Occur.MUST)); query.Add(new BooleanClause(new TermQuery(new Term("Version", version)), Occur.MUST)); writer.DeleteDocuments(query); Document doc = new Document(); doc.Add(new Field("Id", item["nuget:id"].ToString(), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Version", item["nuget:version"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } string trace = Guid.NewGuid().ToString(); writer.Commit(new Dictionary<string, string> { { "commitTimeStamp", commitTimeStamp.ToString("O") }, { "trace", trace } }); Trace.TraceInformation("COMMIT {0} documents, index contains {1} documents, commitTimeStamp {2}, trace: {3}", i, writer.NumDocs(), commitTimeStamp.ToString("O"), trace); } return Task.FromResult(true); }
protected override async Task ProcessSortedBatch(CollectorHttpClient client, KeyValuePair<string, IList<JObject>> sortedBatch, JToken context, CancellationToken cancellationToken) { IDictionary<string, IGraph> graphs = new Dictionary<string, IGraph>(); foreach (JObject item in sortedBatch.Value) { if (Utils.IsType((JObject)context, item, _types)) { string itemUri = item["@id"].ToString(); IGraph graph = await client.GetGraphAsync(new Uri(itemUri), cancellationToken); graphs.Add(itemUri, graph); } } if (graphs.Count > 0) { await ProcessGraphs(new KeyValuePair<string, IDictionary<string, IGraph>>(sortedBatch.Key, graphs), cancellationToken); } }
protected override async Task<bool> OnProcessBatch( CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, bool isLastBatch, CancellationToken cancellationToken) { IDictionary<string, IList<JObject>> sortedItems = new Dictionary<string, IList<JObject>>(); foreach (JObject item in items) { string key = GetKey(item); IList<JObject> itemList; if (!sortedItems.TryGetValue(key, out itemList)) { itemList = new List<JObject>(); sortedItems.Add(key, itemList); } itemList.Add(item); } IList<Task> tasks = new List<Task>(); foreach (KeyValuePair<string, IList<JObject>> sortedBatch in sortedItems) { Task task = ProcessSortedBatch(client, sortedBatch, context, cancellationToken); tasks.Add(task); if (!Concurrent) { task.Wait(); } } await Task.WhenAll(tasks.ToArray()); return true; }
protected override async Task ProcessSortedBatch(CollectorHttpClient client, KeyValuePair <string, IList <JObject> > sortedBatch, JToken context) { IDictionary <string, IGraph> graphs = new Dictionary <string, IGraph>(); foreach (JObject item in sortedBatch.Value) { if (Utils.IsType((JObject)context, item, _types)) { string itemUri = item["@id"].ToString(); IGraph graph = await client.GetGraphAsync(new Uri(itemUri)); graphs.Add(itemUri, graph); } } if (graphs.Count > 0) { await ProcessGraphs(new KeyValuePair <string, IDictionary <string, IGraph> >(sortedBatch.Key, graphs)); } }
protected override async Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { JObject catalogIndex = (_baseAddress != null) ? await client.GetJObjectAsync(Index, cancellationToken) : null; IEnumerable<JObject> catalogItems = await FetchCatalogItems(client, items, cancellationToken); using (IndexWriter indexWriter = CreateIndexWriter(_directory)) { Trace.TraceInformation("Index contains {0} documents", indexWriter.NumDocs()); ProcessCatalogIndex(indexWriter, catalogIndex, _baseAddress); ProcessCatalogItems(indexWriter, catalogItems, _baseAddress); indexWriter.ExpungeDeletes(); indexWriter.Commit(CreateCommitMetadata(commitTimeStamp)); Trace.TraceInformation("COMMIT index contains {0} documents commitTimeStamp {1}", indexWriter.NumDocs(), commitTimeStamp.ToString("O")); } return true; }
protected override async Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { foreach (JToken item in items) { string id = item["nuget:id"].ToString().ToLowerInvariant(); string version = item["nuget:version"].ToString().ToLowerInvariant(); string type = item["@type"].ToString().Replace("nuget:", Schema.Prefixes.NuGet); Storage storage = _storageFactory.Create(id); if (type == Schema.DataTypes.PackageDetails.ToString()) { // Add/update package string nuspec = await LoadNuspec(client, id, version, cancellationToken); if (nuspec != null) { await SaveNuspec(storage, id, version, nuspec, cancellationToken); await CopyNupkg(client, storage, id, version, cancellationToken); await UpdateMetadata(storage, versions => versions.Add(NuGetVersion.Parse(version)), cancellationToken); Trace.TraceInformation("commit: {0}/{1}", id, version); } else { Trace.TraceWarning("no nuspec available for {0}/{1} skipping", id, version); } } else if (type == Schema.DataTypes.PackageDelete.ToString()) { // Delete package await UpdateMetadata(storage, versions => versions.Remove(NuGetVersion.Parse(version)), cancellationToken); await DeleteNuspec(storage, id, version, cancellationToken); await DeleteNupkg(storage, id, version, cancellationToken); Trace.TraceInformation("commit delete: {0}/{1}", id, version); } } return true; }
public async Task ProcessGraphs(CollectorHttpClient client, string packageId, IEnumerable<Uri> catalogPageUris, JObject context, CancellationToken cancellationToken) { ConcurrentDictionary<string, IGraph> graphs = new ConcurrentDictionary<string, IGraph>(); ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; var uris = catalogPageUris.ToArray(); Parallel.ForEach(uris, options, uri => { var task = client.GetGraphAsync(uri); task.Wait(); if (!graphs.TryAdd(uri.AbsoluteUri, task.Result)) { throw new Exception("Duplicate graph: " + uri); } }); await ProcessGraphs(new KeyValuePair<string, IDictionary<string, IGraph>>(packageId, graphs), cancellationToken); }
protected async Task <IEnumerable <CatalogCommit> > FetchCatalogCommitsAsync( CollectorHttpClient client, ReadCursor front, ReadCursor back, CancellationToken cancellationToken) { JObject root; using (_telemetryService.TrackDuration( TelemetryConstants.CatalogIndexReadDurationSeconds, new Dictionary <string, string>() { { TelemetryConstants.Uri, Index.AbsoluteUri } })) { root = await client.GetJObjectAsync(Index, cancellationToken); } var commits = root["items"].Select(item => CatalogCommit.Create((JObject)item)); return(GetCommitsInRange(commits, front.Value, back.Value)); }
protected override async Task ProcessSortedBatchAsync( CollectorHttpClient client, KeyValuePair <string, IList <JObject> > sortedBatch, JToken context, CancellationToken cancellationToken) { var graphs = new Dictionary <string, IGraph>(); var graphTasks = new Dictionary <string, Task <IGraph> >(); foreach (var item in sortedBatch.Value) { if (Utils.IsType((JObject)context, item, _types)) { var itemUri = item["@id"].ToString(); // Load package details from catalog. // Download the graph to a read-only container. This allows operations on each graph to be safely // parallelized. var task = client.GetGraphAsync(new Uri(itemUri), readOnly: true, token: cancellationToken); graphTasks.Add(itemUri, task); } } await Task.WhenAll(graphTasks.Values.ToArray()); foreach (var task in graphTasks) { graphs.Add(task.Key, task.Value.Result); } if (graphs.Count > 0) { var sortedGraphs = new KeyValuePair <string, IReadOnlyDictionary <string, IGraph> >(sortedBatch.Key, graphs); await ProcessGraphsAsync(sortedGraphs, cancellationToken); } }
public async Task<bool> Run(ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { await Task.WhenAll(front.Load(cancellationToken), back.Load(cancellationToken)); Trace.TraceInformation("Run ( {0} , {1} )", front, back); bool result = false; HttpMessageHandler handler = null; if (_handlerFunc != null) { handler = _handlerFunc(); } using (CollectorHttpClient client = new CollectorHttpClient(handler)) { result = await Fetch(client, front, back, cancellationToken); RequestCount = client.RequestCount; } return result; }
protected override async Task <bool> OnProcessBatchAsync( CollectorHttpClient client, IEnumerable <CatalogCommitItem> items, JToken context, DateTime commitTimeStamp, bool isLastBatch, CancellationToken cancellationToken) { var sortedItems = new Dictionary <T, IList <CatalogCommitItem> >(); foreach (CatalogCommitItem item in items) { T key = GetKey(item); IList <CatalogCommitItem> itemList; if (!sortedItems.TryGetValue(key, out itemList)) { itemList = new List <CatalogCommitItem>(); sortedItems.Add(key, itemList); } itemList.Add(item); } IList <Task> tasks = new List <Task>(); foreach (KeyValuePair <T, IList <CatalogCommitItem> > sortedBatch in sortedItems) { Task task = ProcessSortedBatchAsync(client, sortedBatch, context, cancellationToken); tasks.Add(task); } await Task.WhenAll(tasks.ToArray()); return(true); }
protected override async Task ProcessSortedBatch( CollectorHttpClient client, KeyValuePair<string, IList<JObject>> sortedBatch, JToken context, CancellationToken cancellationToken) { var graphs = new Dictionary<string, IGraph>(); var graphTasks = new Dictionary<string, Task<IGraph>>(); foreach (var item in sortedBatch.Value) { if (Utils.IsType((JObject)context, item, _types)) { var itemUri = item["@id"].ToString(); var task = client.GetGraphAsync(new Uri(itemUri), cancellationToken); graphTasks.Add(itemUri, task); if (!Concurrent) { task.Wait(cancellationToken); } } } await Task.WhenAll(graphTasks.Values.ToArray()); foreach (var task in graphTasks) { graphs.Add(task.Key, task.Value.Result); } if (graphs.Count > 0) { await ProcessGraphs(new KeyValuePair<string, IDictionary<string, IGraph>>(sortedBatch.Key, graphs), cancellationToken); } }
protected async Task <IEnumerable <CatalogCommit> > FetchCatalogCommitsAsync( CollectorHttpClient client, ReadWriteCursor front, CancellationToken cancellationToken) { JObject root; using (_telemetryService.TrackDuration( TelemetryConstants.CatalogIndexReadDurationSeconds, new Dictionary <string, string>() { { TelemetryConstants.Uri, Index.AbsoluteUri } })) { root = await client.GetJObjectAsync(Index, cancellationToken); } IEnumerable <CatalogCommit> commits = root["items"] .Select(item => CatalogCommit.Create((JObject)item)) .Where(item => item.CommitTimeStamp > front.Value) .OrderBy(item => item.CommitTimeStamp); return(commits); }
public async Task <bool> Run(ReadWriteCursor front, ReadCursor back) { await Task.WhenAll(front.Load(), back.Load()); Trace.TraceInformation("Run ( {0} , {1} )", front, back); bool result = false; HttpMessageHandler handler = null; if (_handlerFunc != null) { handler = _handlerFunc(); } using (CollectorHttpClient client = new CollectorHttpClient(handler)) { result = await Fetch(client, front, back); RequestCount = client.RequestCount; } return(result); }
protected abstract Task <bool> FetchAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken);
public static void StartProcessingBatchesIfNoFailures( CollectorHttpClient client, JToken context, List <CatalogCommitItemBatch> unprocessedBatches, List <CatalogCommitItemBatchTask> processingBatches, int maxConcurrentBatches, ProcessCommitItemBatchAsync processCommitItemBatchAsync, CancellationToken cancellationToken) { if (client == null) { throw new ArgumentNullException(nameof(client)); } if (context == null) { throw new ArgumentNullException(nameof(context)); } if (unprocessedBatches == null) { throw new ArgumentNullException(nameof(unprocessedBatches)); } if (processingBatches == null) { throw new ArgumentNullException(nameof(processingBatches)); } if (maxConcurrentBatches < 1) { throw new ArgumentOutOfRangeException( nameof(maxConcurrentBatches), maxConcurrentBatches, string.Format(Strings.ArgumentOutOfRange, 1, int.MaxValue)); } if (processCommitItemBatchAsync == null) { throw new ArgumentNullException(nameof(processCommitItemBatchAsync)); } var hasAnyBatchFailed = processingBatches.Any(batch => batch.Task.IsFaulted || batch.Task.IsCanceled); if (hasAnyBatchFailed) { return; } var batchesToEnqueue = Math.Min( maxConcurrentBatches - processingBatches.Count(batch => !batch.Task.IsCompleted), unprocessedBatches.Count); for (var i = 0; i < batchesToEnqueue; ++i) { var batch = unprocessedBatches[0]; unprocessedBatches.RemoveAt(0); var task = processCommitItemBatchAsync( client, context, batch.Key, batch, lastBatch: null, cancellationToken: cancellationToken); var batchTask = new CatalogCommitItemBatchTask(batch, task); processingBatches.Add(batchTask); } }
public CatalogIndexReader(Uri indexUri, CollectorHttpClient httpClient) { _indexUri = indexUri; _httpClient = httpClient; }
public PartitionedRegJob(Config config, Storage storage, StorageFactory factory, CollectorHttpClient client) : base(config, storage, "partitionedreg") { _factory = factory; _client = client; }
protected abstract Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken);
protected abstract Task ProcessSortedBatch(CollectorHttpClient client, KeyValuePair <string, IList <JObject> > sortedBatch, JToken context);
protected override async Task <bool> FetchAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { IEnumerable <CatalogCommit> commits = await FetchCatalogCommitsAsync(client, front, cancellationToken); bool acceptNextBatch = false; foreach (CatalogCommit commit in commits) { JObject page = await client.GetJObjectAsync(commit.Uri, cancellationToken); JToken context = null; page.TryGetValue("@context", out context); var batches = await CreateBatchesAsync(page["items"] .Select(item => CatalogCommitItem.Create((JObject)context, (JObject)item)) .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value)); var orderedBatches = batches .OrderBy(batch => batch.CommitTimeStamp) .ToList(); var lastBatch = orderedBatches.LastOrDefault(); DateTime?previousCommitTimeStamp = null; foreach (var batch in orderedBatches) { // If the commit timestamp has changed from the previous batch, commit. This is important because if // two batches have the same commit timestamp but processing the second fails, we should not // progress the cursor forward. if (previousCommitTimeStamp.HasValue && previousCommitTimeStamp != batch.CommitTimeStamp) { front.Value = previousCommitTimeStamp.Value; await front.SaveAsync(cancellationToken); Trace.TraceInformation("CommitCatalog.Fetch front.Value saved since timestamp changed from previous: {0}", front); } using (_telemetryService.TrackDuration(TelemetryConstants.ProcessBatchSeconds, new Dictionary <string, string>() { { TelemetryConstants.BatchItemCount, batch.Items.Count.ToString() } })) { acceptNextBatch = await OnProcessBatchAsync( client, batch.Items, context, batch.CommitTimeStamp, batch.CommitTimeStamp == lastBatch.CommitTimeStamp, cancellationToken); } // If this is the last batch, commit the cursor. if (ReferenceEquals(batch, lastBatch)) { front.Value = batch.CommitTimeStamp; await front.SaveAsync(cancellationToken); Trace.TraceInformation("CommitCatalog.Fetch front.Value saved due to last batch: {0}", front); } previousCommitTimeStamp = batch.CommitTimeStamp; Trace.TraceInformation("CommitCatalog.Fetch front.Value is: {0}", front); if (!acceptNextBatch) { break; } } if (!acceptNextBatch) { break; } } return(acceptNextBatch); }
protected virtual Task <bool> OnProcessBatch(CollectorHttpClient client, IList <JObject> items, JObject context, DateTime resumeDateTime) { return(OnProcessBatch(client, items, context)); }
protected abstract Task <bool> OnProcessBatch(CollectorHttpClient client, IList <JObject> items, JObject context);
protected abstract Task ProcessSortedBatchAsync( CollectorHttpClient client, KeyValuePair <T, IList <JObject> > sortedBatch, JToken context, CancellationToken cancellationToken);
protected override async Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back) { int beforeBatchCount = BatchCount; IList <JObject> items = new List <JObject>(); JObject root = await client.GetJObjectAsync(Index); JToken context = null; root.TryGetValue("@context", out context); IEnumerable <JToken> rootItems = root["items"].OrderBy(item => item["commitTimeStamp"].ToObject <DateTime>()); DateTime resumeDateTime = front.Value; bool acceptNextBatch = true; foreach (JObject rootItem in rootItems) { if (!acceptNextBatch) { break; } DateTime rootItemCommitTimeStamp = rootItem["commitTimeStamp"].ToObject <DateTime>(); if (rootItemCommitTimeStamp <= front.Value) { continue; } Uri pageUri = rootItem["@id"].ToObject <Uri>(); JObject page = await client.GetJObjectAsync(pageUri); IEnumerable <JToken> pageItems = page["items"].OrderBy(item => item["commitTimeStamp"].ToObject <DateTime>()); foreach (JObject pageItem in pageItems) { DateTime pageItemCommitTimeStamp = pageItem["commitTimeStamp"].ToObject <DateTime>(); if (pageItemCommitTimeStamp <= front.Value) { continue; } if (pageItemCommitTimeStamp > back.Value) { break; } items.Add(pageItem); resumeDateTime = pageItemCommitTimeStamp; if (items.Count == _batchSize) { acceptNextBatch = await ProcessBatch(client, items, context, front, resumeDateTime); if (!acceptNextBatch) { break; } } } } if (acceptNextBatch && items.Count > 0) { await ProcessBatch(client, items, context, front, resumeDateTime); } int afterBatchCount = BatchCount; PreviousRunBatchCount = (afterBatchCount - beforeBatchCount); return(PreviousRunBatchCount > 0); }
protected abstract Task <bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back);
public CatalogIndexReader(Uri indexUri, CollectorHttpClient httpClient, ITelemetryService telemetryService) { _indexUri = indexUri; _httpClient = httpClient; _telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService)); }
async Task<bool> ProcessBatch(CollectorHttpClient client, IList<JObject> items, JToken context, ReadWriteCursor front, DateTime resumeDateTime, CancellationToken cancellationToken) { bool acceptNextBatch = await OnProcessBatch(client, items, (JObject)context, resumeDateTime, cancellationToken); BatchCount++; items.Clear(); front.Value = resumeDateTime; await front.Save(cancellationToken); return acceptNextBatch; }
protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { return Task.FromResult(true); }
protected abstract Task<bool> OnProcessBatch(CollectorHttpClient client, IList<JObject> items, JObject context, CancellationToken cancellationToken);
private async Task StrikeAsync() { _log.WriteLine("Start lightning strike for {0}...", _cursorFile); // Get batch range int batchStart; int batchEnd; using (var cursorStreamReader = new StreamReader(_cursorFile)) { var batchRange = (await cursorStreamReader.ReadLineAsync()).Split(','); batchStart = int.Parse(batchRange[0]); batchEnd = int.Parse(batchRange[1]); _log.WriteLine("Batch range: {0} - {1}", batchStart, batchEnd); } if (batchStart > batchEnd) { _log.WriteLine("Batch already finished."); return; } // Time to strike var collectorHttpClient = new CollectorHttpClient(); var account = CloudStorageAccount.Parse(_storageAccount); var storageFactory = (StorageFactory)new AzureStorageFactory(account, _storageContainer, null, new Uri(_storageBaseAddress)) { CompressContent = _compress }; var startElement = string.Format("Element@{0}.", batchStart); var endElement = string.Format("Element@{0}.", batchEnd + 1); using (var indexStreamReader = new StreamReader(_indexFile)) { string line; // Skip entries that are not in the current batch bounds do { line = await indexStreamReader.ReadLineAsync(); } while (!line.Contains(startElement)); // Run until we're outside the current batch bounds while (!string.IsNullOrEmpty(line) && !line.Contains(endElement) && !indexStreamReader.EndOfStream) { _log.WriteLine(line); try { var packageId = line.Split(new[] { ". " }, StringSplitOptions.None).Last().Trim(); var sortedGraphs = new Dictionary<string, IGraph>(); line = await indexStreamReader.ReadLineAsync(); while (!string.IsNullOrEmpty(line) && !line.Contains("Element@") && !indexStreamReader.EndOfStream) { // Fetch graph for package version var url = line.TrimEnd(); var graph = await collectorHttpClient.GetGraphAsync(new Uri(url)); if (sortedGraphs.ContainsKey(url)) { sortedGraphs[url] = graph; } else { sortedGraphs.Add(url, graph); } // To reduce memory footprint, we're flushing out large registrations // in very small batches. if (graph.Nodes.Count() > 3000 && sortedGraphs.Count >= 2) { // Process graphs await ProcessGraphsAsync(packageId, sortedGraphs, storageFactory, _contentBaseAddress); // Destroy! sortedGraphs = new Dictionary<string, IGraph>(); } // Read next line line = await indexStreamReader.ReadLineAsync(); } // Process graphs if (sortedGraphs.Any()) { await ProcessGraphsAsync(packageId, sortedGraphs, storageFactory, _contentBaseAddress); } // Update cursor file so next time we have less work to do batchStart++; await UpdateCursorFileAsync(_cursorFile, batchStart, batchEnd); } catch (Exception) { UpdateCursorFileAsync(_cursorFile, batchStart, batchEnd).Wait(); throw; } } } await UpdateCursorFileAsync("DONE" + _cursorFile, batchStart, batchEnd); _log.WriteLine("Finished lightning strike for {0}.", _cursorFile); }
/// <summary> /// Sets the service endpoint property values to the ones defined in the specified service index. /// </summary> /// <param name="serviceIndexUrl">The service index endpoint which contains the endpoints of the NuGet services.</param> private async Task InitializeAsync(Uri serviceIndexUrl) { ServiceIndex serviceIndex; using (CollectorHttpClient client = new CollectorHttpClient()) { string serviceIndexText = await client.GetStringAsync(serviceIndexUrl); serviceIndex = ServiceIndex.Deserialize(serviceIndexText); } Uri registrationBaseUrl; if (!serviceIndex.TryGetResourceId("RegistrationsBaseUrl", out registrationBaseUrl)) { throw new ArgumentOutOfRangeException("serviceIndexAddress", "The service index does not contain a RegistrationBaseUrl entry."); } this.RegistrationBaseUrl = registrationBaseUrl; this.ServiceIndexUrl = serviceIndexUrl; }
protected override Task <bool> OnProcessBatch(CollectorHttpClient client, IEnumerable <JToken> items, JToken context, DateTime commitTimeStamp) { return(Task.FromResult(true)); }
protected abstract Task ProcessSortedBatch( CollectorHttpClient client, KeyValuePair<string, IList<JObject>> sortedBatch, JToken context, CancellationToken cancellationToken);
private async Task PrepareAsync() { _log.WriteLine("Making sure folder {0} exists.", _outputFolder); if (!Directory.Exists(_outputFolder)) { Directory.CreateDirectory(_outputFolder); } // Create reindex file _log.WriteLine("Start preparing lightning reindex file..."); var latestCommit = DateTime.MinValue; int numberOfEntries = 0; string indexFile = Path.Combine(_outputFolder, "index.txt"); using (var streamWriter = new StreamWriter(indexFile, false)) { var collectorHttpClient = new CollectorHttpClient(); var catalogIndexReader = new CatalogIndexReader(new Uri(_catalogIndex), collectorHttpClient); var catalogIndexEntries = await catalogIndexReader.GetEntries(); foreach (var packageRegistrationGroup in catalogIndexEntries .OrderBy(x => x.CommitTimeStamp) .ThenBy(x => x.Id) .ThenBy(x => x.Version) .GroupBy(x => x.Id)) { streamWriter.WriteLine("Element@{0}. {1}", numberOfEntries++, packageRegistrationGroup.Key); var latestCatalogPages = new Dictionary<string, Uri>(); foreach (CatalogIndexEntry catalogIndexEntry in packageRegistrationGroup) { string key = catalogIndexEntry.Version.ToNormalizedString(); if (latestCatalogPages.ContainsKey(key)) { latestCatalogPages[key] = catalogIndexEntry.Uri; } else { latestCatalogPages.Add(key, catalogIndexEntry.Uri); } if (latestCommit < catalogIndexEntry.CommitTimeStamp) { latestCommit = catalogIndexEntry.CommitTimeStamp; } } foreach (var latestCatalogPage in latestCatalogPages) { streamWriter.WriteLine("{0}", latestCatalogPage.Value); } } } _log.WriteLine("Finished preparing lightning reindex file. Output file: {0}", indexFile); // Write cursor to storage _log.WriteLine("Start writing new cursor..."); var account = CloudStorageAccount.Parse(_storageAccount); var storageFactory = (StorageFactory)new AzureStorageFactory(account, _storageContainer); var storage = storageFactory.Create(); var cursor = new DurableCursor(storage.ResolveUri("cursor.json"), storage, latestCommit); cursor.Value = latestCommit; await cursor.Save(CancellationToken.None); _log.WriteLine("Finished writing new cursor."); // Write command files _log.WriteLine("Start preparing lightning reindex command files..."); string templateFileContents; using (var templateStreamReader = new StreamReader(_templateFile)) { templateFileContents = await templateStreamReader.ReadToEndAsync(); } int batchNumber = 0; int batchSizeValue = int.Parse(_batchSize); for (int batchStart = 0; batchStart < numberOfEntries; batchStart += batchSizeValue) { var batchEnd = (batchStart + batchSizeValue - 1); if (batchEnd >= numberOfEntries) { batchEnd = numberOfEntries - 1; } var cursorCommandFileName = "cursor" + batchNumber + ".cmd"; var cursorTextFileName = "cursor" + batchNumber + ".txt"; using (var cursorCommandStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorCommandFileName))) { using (var cursorTextStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorTextFileName))) { var commandStreamContents = templateFileContents .Replace("[index]", indexFile) .Replace("[cursor]", cursorTextFileName) .Replace("[contentbaseaddress]", _contentBaseAddress) .Replace("[storageaccount]", _storageAccount) .Replace("[storagecontainer]", _storageContainer) .Replace("[storagebaseaddress]", _storageBaseAddress) .Replace("[compress]", _compress.ToString().ToLowerInvariant()); await cursorCommandStreamWriter.WriteLineAsync(commandStreamContents); await cursorTextStreamWriter.WriteLineAsync(batchStart + "," + batchEnd); } } batchNumber++; } _log.WriteLine("Finished preparing lightning reindex command files."); _log.WriteLine("You can now copy the {0} file and all cursor*.cmd, cursor*.txt", indexFile); _log.WriteLine("to multiple machines and run the cursor*.cmd files in parallel."); }
protected abstract Task <bool> OnProcessBatch(CollectorHttpClient client, IEnumerable <JToken> items, JToken context, DateTime commitTimeStamp);
protected virtual Task<bool> OnProcessBatch(CollectorHttpClient client, IList<JObject> items, JObject context, DateTime resumeDateTime, CancellationToken cancellationToken) { return OnProcessBatch(client, items, context, cancellationToken); }
internal static async Task <bool> ProcessCatalogCommitsAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, FetchCatalogCommitsAsync fetchCatalogCommitsAsync, CreateCommitItemBatchesAsync createCommitItemBatchesAsync, ProcessCommitItemBatchAsync processCommitItemBatchAsync, int maxConcurrentBatches, ILogger logger, CancellationToken cancellationToken) { var rootItems = await fetchCatalogCommitsAsync(client, front, back, cancellationToken); var hasAnyBatchFailed = false; var hasAnyBatchBeenProcessed = false; foreach (CatalogCommit rootItem in rootItems) { JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken); var context = (JObject)page["@context"]; CatalogCommitItemBatch[] batches = await CreateBatchesForAllAvailableItemsInPageAsync( front, back, page, context, createCommitItemBatchesAsync); if (!batches.Any()) { continue; } hasAnyBatchBeenProcessed = true; DateTime maxCommitTimeStamp = GetMaxCommitTimeStamp(batches); var unprocessedBatches = batches.ToList(); var processingBatches = new List <CatalogCommitItemBatchTask>(); var exceptions = new List <Exception>(); StartProcessingBatchesIfNoFailures( client, context, unprocessedBatches, processingBatches, maxConcurrentBatches, processCommitItemBatchAsync, cancellationToken); while (processingBatches.Any()) { var activeTasks = processingBatches.Where(batch => !batch.Task.IsCompleted) .Select(batch => batch.Task) .DefaultIfEmpty(Task.CompletedTask); await Task.WhenAny(activeTasks); for (var i = 0; i < processingBatches.Count; ++i) { var batch = processingBatches[i]; if (batch.Task.IsFaulted || batch.Task.IsCanceled) { hasAnyBatchFailed = true; if (batch.Task.Exception != null) { var exception = ExceptionUtilities.Unwrap(batch.Task.Exception); exceptions.Add(exception); } } if (batch.Task.IsCompleted) { processingBatches.RemoveAt(i); --i; } } if (!hasAnyBatchFailed) { StartProcessingBatchesIfNoFailures( client, context, unprocessedBatches, processingBatches, maxConcurrentBatches, processCommitItemBatchAsync, cancellationToken); } } if (hasAnyBatchFailed) { foreach (var exception in exceptions) { logger.LogError(_eventId, exception, Strings.BatchProcessingFailure); } var innerException = exceptions.Count == 1 ? exceptions.Single() : new AggregateException(exceptions); throw new BatchProcessingException(innerException); } front.Value = maxCommitTimeStamp; await front.SaveAsync(cancellationToken); Trace.TraceInformation($"{nameof(CatalogCommitUtilities)}.{nameof(ProcessCatalogCommitsAsync)} " + $"{nameof(front)}.{nameof(front.Value)} saved since timestamp changed from previous: {{0}}", front); } return(hasAnyBatchBeenProcessed); }
protected override async Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { int beforeBatchCount = BatchCount; IList<JObject> items = new List<JObject>(); JObject root = await client.GetJObjectAsync(Index, cancellationToken); JToken context = null; root.TryGetValue("@context", out context); IEnumerable<JToken> rootItems = root["items"].OrderBy(item => item["commitTimeStamp"].ToObject<DateTime>()); DateTime resumeDateTime = front.Value; bool acceptNextBatch = true; foreach (JObject rootItem in rootItems) { if (!acceptNextBatch) { break; } DateTime rootItemCommitTimeStamp = rootItem["commitTimeStamp"].ToObject<DateTime>(); if (rootItemCommitTimeStamp <= front.Value) { continue; } Uri pageUri = rootItem["@id"].ToObject<Uri>(); JObject page = await client.GetJObjectAsync(pageUri); IEnumerable<JToken> pageItems = page["items"].OrderBy(item => item["commitTimeStamp"].ToObject<DateTime>()); foreach (JObject pageItem in pageItems) { DateTime pageItemCommitTimeStamp = pageItem["commitTimeStamp"].ToObject<DateTime>(); if (pageItemCommitTimeStamp <= front.Value) { continue; } if (pageItemCommitTimeStamp > back.Value) { break; } items.Add(pageItem); resumeDateTime = pageItemCommitTimeStamp; if (items.Count == _batchSize) { acceptNextBatch = await ProcessBatch(client, items, context, front, resumeDateTime, cancellationToken); if (!acceptNextBatch) { break; } } } } if (acceptNextBatch && items.Count > 0) { await ProcessBatch(client, items, context, front, resumeDateTime, cancellationToken); } int afterBatchCount = BatchCount; PreviousRunBatchCount = (afterBatchCount - beforeBatchCount); return (PreviousRunBatchCount > 0); }
protected abstract Task<bool> Fetch(CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken);