private static async Task ImportVerticesAsync(IEnumerable <GremlinVertex> gremlinVertices) { Trace.TraceInformation(nameof(ImportVerticesAsync)); var token = new CancellationTokenSource().Token; BulkImportResponse vResponse = null; while (Client == null || _graphBulkExecutor == null || _initializeAsyncTask?.IsCompleted != true) { await Task.Delay(100.Milliseconds(), token); } try { if (gremlinVertices != null) { vResponse = await _graphBulkExecutor.BulkImportAsync( gremlinVertices, true, true, null, null, token); } } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } Trace.TraceInformation("END" + nameof(ImportVerticesAsync)); }
public async Task <int> BulkImport(IEnumerable <T> entities, CancellationToken token = new CancellationToken()) { BulkImportResponse bulkImportResponse = null; long totalDocumentsAdded = 0; double totalRequestUnitsConsumed = 0; double totalSecondsElapsed = 0; int totalDocumentToImport = entities.Count(); while ((bulkImportResponse == null || bulkImportResponse.NumberOfDocumentsImported < totalDocumentToImport) && !token.IsCancellationRequested) { bulkImportResponse = await _bulkExecutor.BulkImportAsync( documents : entities, enableUpsert : true, disableAutomaticIdGeneration : false, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); totalDocumentsAdded += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalSecondsElapsed += bulkImportResponse.TotalTimeTaken.TotalSeconds; _logger.LogInformation( $"added {totalDocumentsAdded}, RU consumed: {totalRequestUnitsConsumed}, elapsed: {totalSecondsElapsed} sec"); } return((int)totalDocumentsAdded); }
public async void IngestDocsInBulk( SqlClientExtension client, IChangeFeedObserverContext context, IReadOnlyList <Document> docs, CancellationToken cancellationToken, Uri destinationCollectionUri) { DocumentCollection documentCollection = await client.GetDocumentCollectionAsync(); InitBulkExecutor(client.DocumentClient, documentCollection); BulkImportResponse bulkImportResponse = null; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents : docs, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } }
private async Task RunBulkImportEdgesAsync( GraphBulkExecutor graphbulkExecutor, EdgeSpec spec, int fromCount, int toCount) { var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; BulkImportResponse response = null; Random r = new Random(); for (int i = 0; i < fromCount; i++) { List <GremlinEdge> edges = new List <GremlinEdge>(); int edgeCount = r.Next(spec.maxCount - spec.minCount) + spec.minCount; for (int j = 0; j < edgeCount; j++) { HashSet <int> neighbors = new HashSet <int>(); int destination = r.Next(toCount); if (neighbors.Contains(destination)) { j--; continue; } neighbors.Add(destination); string id = createEdgeId(i, spec.from, destination, spec.to); GremlinEdge e = new GremlinEdge( id, spec.label, createVertexId(spec.from, i), createVertexId(spec.to, destination), spec.from, spec.to, createVertexId(spec.from, i), createVertexId(spec.to, destination)); for (int k = 0; k < spec.numberOfProperties; k++) { e.AddProperty("property_" + k, Guid.NewGuid().ToString()); } edges.Add(e); } try { response = await graphbulkExecutor.BulkImportAsync( edges, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } } }
public async Task <bool> CreateUploadTestDataInbatches() { #region batchVariables //initialize Batch Process variables int batchCount = 0; int totalUploaded = 0; var badEntities = new List <Object>(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); #endregion while (totalUploaded < maxtestDocumentCount) { batchCount++; logger.LogInfo("Begin Sample Db creation"); List <dynamic> entities = GetCommonEntitiesinBatch(); BulkImportResponse uploadResponse = new BulkImportResponse(); if (entities.Any()) { uploadResponse = await cosmosBulkImporter.BulkSendToNewCollection <dynamic>(entities); } badEntities = uploadResponse.BadInputDocuments; //summary.totalRecordsSent += uploadResponse.NumberOfDocumentsImported; totalUploaded += entities.Count(); logger.LogInfo($"Summary of Batch {batchCount} records retrieved {entities.Count()}. Records Uploaded: {uploadResponse.NumberOfDocumentsImported}"); //logger.LogInfo($"Total records retrieved {summary.totalRecordsRetrieved}. Total records uploaded {summary.totalRecordsSent}"); logger.LogInfo($"Time elapsed : {stopwatch.Elapsed} "); } stopwatch.Stop(); logger.LogInfo("Completed Sample DB creation."); return(true); }
public static async Task BulkImportDocuments(List <string> documentsToImportInBatch) { string EndpointUrl = Environment.GetEnvironmentVariable("EndpointUrl"); string AuthorizationKey = Environment.GetEnvironmentVariable("AuthorizationKey"); DocumentClient _client = new DocumentClient(new Uri(EndpointUrl), AuthorizationKey); DocumentCollection dataCollection = Utils.GetCollectionIfExists(_client, "db", "coll"); IBulkExecutor bulkExecutor = new BulkExecutor(_client, dataCollection); await bulkExecutor.InitializeAsync(); BulkImportResponse bulkImportResponse = null; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents : documentsToImportInBatch, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Console.WriteLine("Document _client exception: {0}", de); throw; } catch (Exception e) { Console.WriteLine("Exception: {0}", e); throw; } //return bulkImportResponse; }
public async Task Run( [Queue("%QueueName%", Connection = "QueueConnectionString")] IAsyncCollector <Document> postMortemQueue, [CosmosDBTrigger( databaseName: "%SourceDatabase%", collectionName: "%SourceCollection%", ConnectionStringSetting = "SourceCosmosDB", LeaseCollectionName = "leases", StartFromBeginning = true, MaxItemsPerInvocation = 10000000, CreateLeaseCollectionIfNotExists = true) ] IReadOnlyList <Document> documents, ILogger log) { if (documents != null && documents.Count > 0) { BulkImportResponse bulkImportResponse = null; List <Task> tasks = new List <Task>(); tasks.Add(Task.Run(async() => { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: documents, enableUpsert: true, disableAutomaticIdGeneration: true, maxInMemorySortingBatchSize: 10000000, cancellationToken: default(CancellationToken)); } catch (DocumentClientException e) { log.LogError("Document client Exception: {0}", e); } catch (Exception e) { log.LogError("Exception: {0}", e); } if (bulkImportResponse.BadInputDocuments != null && bulkImportResponse.BadInputDocuments.Count > 0) { foreach (Document doc in bulkImportResponse.BadInputDocuments) { await postMortemQueue.AddAsync(doc); log.LogInformation("Document added to the post-mortem queue: {0}", doc.Id); } } }, default(CancellationToken))); await Task.WhenAll(tasks); log.LogMetric("The Number of Documents Imported", bulkImportResponse.NumberOfDocumentsImported); log.LogMetric("The Total Number of RU/s consumed", bulkImportResponse.TotalRequestUnitsConsumed); log.LogMetric("RU/s per Document Write", bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported); log.LogMetric("RU/s being used", bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds); log.LogMetric("Migration Time", bulkImportResponse.TotalTimeTaken.TotalMinutes); } }
public async Task ReadUploadInBatches(IDocumentQuery <string> query, List <ScrubRule> scrubRules) { #region batchVariables //initialize Batch Process variables int batchCount = 0; TotalRecordsRetrieved = 0; TotalRecordsScrubbed = 0; var badEntities = new List <Object>(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); var objScrubber = new ObjectScrubber(); #endregion while (query.HasMoreResults) { batchCount++; logger.LogInfo($"BatchNumber : {batchCount} begins "); List <string> entities = await GetCommonStringEntitiesinBatch(query); TotalRecordsRetrieved += entities.Count(); BulkImportResponse uploadResponse = new BulkImportResponse(); var scrubbedEntities = entities; if (entities.Any()) { var jEntities = new List <JToken>(); foreach (var scrubRule in scrubRules) { jEntities = objScrubber.ScrubObjectList(scrubbedEntities, scrubRule); var nentities = new List <string>(); foreach (var jobj in jEntities) { nentities.Add(JsonConvert.SerializeObject(jobj)); } scrubbedEntities = nentities; scrubRule.RecordsUpdated += jEntities.Count; } var objEntities = jEntities.Cast <Object>().ToList(); try { uploadResponse = await cosmosBulkImporter.BulkSendToNewCollection <dynamic>(objEntities); } catch (Exception ex) { logger.LogError(ex); throw; } } badEntities = uploadResponse.BadInputDocuments; TotalRecordsScrubbed += uploadResponse.NumberOfDocumentsImported; logger.LogInfo($"Summary of Batch {batchCount} records retrieved {entities.Count()}. Records Uploaded: {uploadResponse.NumberOfDocumentsImported}"); logger.LogInfo($"Total records retrieved {TotalRecordsRetrieved}. Total records uploaded {TotalRecordsScrubbed}"); logger.LogInfo($"Time elapsed : {stopwatch.Elapsed} "); } stopwatch.Stop(); logger.LogInfo("Document Scrubbing completed"); }
private async Task <Tuple <long, double, double> > ImportBatch( int batchNuber, IEnumerable <string> documentsToImportInBatch, CancellationToken token) { await Console.Out.WriteLineAsync($"Executing bulk import for batch {batchNuber}"); BulkImportResponse bulkImportResponse = null; do { try { bulkImportResponse = await(await _bulkExecutor.Value).BulkImportAsync( documents: documentsToImportInBatch, enableUpsert: false, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: token); } catch (DocumentClientException de) { await Console.Out.WriteLineAsync($"Document client exception: {de.Message}"); break; } catch (Exception e) { await Console.Out.WriteLineAsync($"Exception: {e.Message}"); break; } } while (bulkImportResponse.NumberOfDocumentsImported < documentsToImportInBatch.Count()); await Console.Out.WriteLineAsync($"\nBatch Summary {batchNuber}:"); await Console.Out.WriteLineAsync("--------------------------------------------------------------------- "); await Console.Out.WriteLineAsync(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), bulkImportResponse.TotalTimeTaken.TotalSeconds)); await Console.Out.WriteLineAsync(String.Format("Average RU consumption per document: {0}", (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); await Console.Out.WriteLineAsync("--------------------------------------------------------------------- "); return(Tuple.Create( bulkImportResponse.NumberOfDocumentsImported, bulkImportResponse.TotalRequestUnitsConsumed, bulkImportResponse.TotalTimeTaken.TotalSeconds)); }
private async Task LoadTestData() { ConnectionPolicy connectionPolicy = new ConnectionPolicy { ConnectionMode = ConnectionMode.Direct, ConnectionProtocol = Protocol.Tcp }; client = new DocumentClient(new Uri(EndpointUrl), PrimaryKey, connectionPolicy); string dbName = "ParkingLedger"; string collectionName = "VehicleAccesses"; await this.client.CreateDatabaseIfNotExistsAsync(new Database { Id = dbName }); var collection = await this.client.CreateDocumentCollectionIfNotExistsAsync(UriFactory.CreateDatabaseUri(dbName), new DocumentCollection { Id = collectionName }); // manual update //var list = CreateAccessesList(); //Parallel.ForEach(list, (x) => //{ // RegisterVehicleAccess(dbName, collectionName, x); //}); // Set retry options high during initialization (default values). client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor bulkExecutor = new BulkExecutor(client, collection); await bulkExecutor.InitializeAsync(); // Set retries to 0 to pass complete control to bulk executor. client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; var list = CreateAccessesList(); var listOfStrings = list.Select(item => JsonConvert.SerializeObject(item)).ToList(); var documents = JsonConvert.SerializeObject(list); BulkImportResponse bulkImportResponse = await bulkExecutor.BulkImportAsync( documents : listOfStrings, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null); Console.WriteLine("Bulk import completed:"); Console.WriteLine($"\tImported: { bulkImportResponse.NumberOfDocumentsImported}"); Console.WriteLine($"\tErrors: { bulkImportResponse.BadInputDocuments.Count}"); Console.WriteLine($"\tRequestUnits: { bulkImportResponse.TotalRequestUnitsConsumed}"); Console.WriteLine($"\tTime taken: { bulkImportResponse.TotalTimeTaken}"); }
public async Task TestIfDocumentsAreUpserted() { Mock<IBulkExecutor> mockBulkExecutor = new Mock<IBulkExecutor>(); Mock<ILogger> mockLog = new Mock<ILogger>(); AsyncCollector<Document> postMortemCol = new AsyncCollector<Document>(); DocumentClient client = new DocumentClient(new Uri(configuration["EndPoint"]), configuration["AuthKey"]); DocumentCollection container = client.CreateDocumentCollectionQuery(UriFactory.CreateDatabaseUri(configuration["TargetDatabase"])) .Where(c => c.Id == configuration["TargetCollection"]).AsEnumerable().FirstOrDefault(); IBulkExecutor bulkExecutor = new BulkExecutor(client, container); await bulkExecutor.InitializeAsync(); IEnumerable<string> bulkDocs = Utilities.GenerateDocumentsWithRandomIdAndPk(5000); BulkImportResponse bulkImportResponse = await bulkExecutor.BulkImportAsync(bulkDocs, false); List<Document> fakeBadDocsBatch = new List<Document>(); Document doc = new Document(); doc.Id = "0f4adabc-d461-495f-bdd3-4f8877ae7f3f"; for (int i = 0; i < 10; i++) { fakeBadDocsBatch.Add(doc); } ReadOnlyCollection<Document> readOnlyDocs = fakeBadDocsBatch.AsReadOnly(); mockBulkExecutor.Setup(bulkExecutorFake => bulkExecutorFake.InitializeAsync()) .Verifiable(); mockBulkExecutor.Setup(bulkExecutorFake => bulkExecutorFake.BulkImportAsync(It.IsAny<ReadOnlyCollection<Document>>(), true, true, null, It.IsAny<int>(), It.IsAny<CancellationToken>())) .Returns(() => Task.FromResult(bulkImportResponse)) //Add docs to the badInputDocuments list to test whether the post-mortem queue is employed .Callback(() => bulkImportResponse.BadInputDocuments.AddRange(fakeBadDocsBatch)); DocumentFeedMigrator migrator = new DocumentFeedMigrator(mockBulkExecutor.Object); await migrator.Run(postMortemCol, readOnlyDocs, mockLog.Object); Assert.AreEqual(postMortemCol.Count(), 10); mockBulkExecutor.Verify( bulkExecutorFake => bulkExecutorFake.BulkImportAsync( It.IsAny<ReadOnlyCollection<Document>>(), true, true, null, It.IsAny<int>(), It.IsAny<CancellationToken>()), Times.Exactly(1)); }
private static void LogMetrics(IChangeFeedObserverContext context, BulkImportResponse bulkImportResponse) { Console.WriteLine("Imported Documents: " + bulkImportResponse.NumberOfDocumentsImported + " by process " + Process.GetCurrentProcess().Id); Console.WriteLine("RUs consumed : " + bulkImportResponse.NumberOfDocumentsImported + " by process " + Process.GetCurrentProcess().Id); Program.telemetryClient.TrackMetric("TotalInserted", bulkImportResponse.NumberOfDocumentsImported); Program.telemetryClient.TrackMetric("InsertedDocuments-Process:" + Process.GetCurrentProcess().Id, bulkImportResponse.NumberOfDocumentsImported); Program.telemetryClient.TrackMetric("TotalRUs", bulkImportResponse.TotalRequestUnitsConsumed); }
private void LogProgress(BulkImportResponse response) { Console.WriteLine(String.Format("\nSummary for collection")); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Created {0} docs @ {1} writes/s, {2} RU/s in {3} sec", response.NumberOfDocumentsImported, Math.Round(response.NumberOfDocumentsImported / response.TotalTimeTaken.TotalSeconds), Math.Round(response.TotalRequestUnitsConsumed / response.TotalTimeTaken.TotalSeconds), response.TotalTimeTaken.TotalSeconds)); Console.WriteLine(String.Format("Average RU consumption per document update: {0}", (response.TotalRequestUnitsConsumed / response.NumberOfDocumentsImported))); Console.WriteLine("---------------------------------------------------------------------\n "); }
private async Task RunBulkImportVerticesAsync(GraphBulkExecutor graphbulkExecutor, VertexSpec vertexSpec) { var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; BulkImportResponse response = null; List <GremlinVertex> vObjects = null; int currentCount = 0; while (currentCount < vertexSpec.count) { vObjects = new List <GremlinVertex>(); for (int i = 0; i < 100000 && currentCount < vertexSpec.count; i++) { string id = createVertexId(vertexSpec.label, currentCount); GremlinVertex v = new GremlinVertex(id, vertexSpec.label); for (int j = 0; j < vertexSpec.numberOfProperties; j++) { v.AddProperty("property_" + j, Guid.NewGuid().ToString()); } v.AddProperty(pkPropertyName, id); currentCount += 1; vObjects.Add(v); } try { response = await graphbulkExecutor.BulkImportAsync( vObjects, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } } }
public async Task ProcessChangesAsync( IChangeFeedObserverContext context, IReadOnlyList <Document> docs, CancellationToken cancellationToken) { BulkImportResponse bulkImportResponse = new BulkImportResponse(); try { Boolean isSyntheticKey = SourcePartitionKeys.Contains(","); Boolean isNestedAttribute = SourcePartitionKeys.Contains("/"); List <Document> transformedDocs = new List <Document>(); Document document = new Document(); foreach (var doc in docs) { document = (SourcePartitionKeys != null & TargetPartitionKey != null) ? MapPartitionKey(doc, isSyntheticKey, TargetPartitionKey, isNestedAttribute, SourcePartitionKeys) : document = doc; transformedDocs.AddRange(documentTransformer.TransformDocument(document).Result); } bulkImportResponse = await bulkExecutor.BulkImportAsync( documents : transformedDocs, enableUpsert : true, maxConcurrencyPerPartitionKeyRange : 1, disableAutomaticIdGeneration : true, maxInMemorySortingBatchSize : null, cancellationToken : new CancellationToken(), maxMiniBatchSizeBytes : 100 * 1024); if (bulkImportResponse.FailedImports.Count > 0 && containerClient != null) { WriteFailedDocsToBlob("FailedImportDocs", containerClient, bulkImportResponse); } if (bulkImportResponse.BadInputDocuments.Count > 0 && containerClient != null) { WriteFailedDocsToBlob("BadInputDocs", containerClient, bulkImportResponse); } LogMetrics(context, bulkImportResponse); } catch (Exception e) { Program.telemetryClient.TrackException(e); } Program.telemetryClient.Flush(); }
/// <summary> /// This method uses the Cosmos DB BulkExecutor library to bulk ingest the input list of JSON documents /// </summary> /// <param name="documentsToImport"> List of documents to bulk ingest into Cosmos DB </param> public async void DataImportForMultipleTemplates(List <string> documentsToImport) { DocumentCollection collection = GetCollectionIfExists(this.DatabaseName, this.CollectionName); if (collection == null) { throw new Exception("The collection does not exist"); } BulkExecutor bulkExecutor = new BulkExecutor(this.Client, collection); await bulkExecutor.InitializeAsync(); BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; try { bulkImportResponse = await bulkExecutor.BulkImportAsync(documentsToImport, false, false); } catch (DocumentClientException de) { Console.WriteLine("Document client exception while execting bulk insert. Stack trace: \n {0}", de.StackTrace); Console.ReadLine(); } catch (Exception e) { Console.WriteLine("Exception thrown while executing bulk insert. Stack trace:\n {0}", e.StackTrace); Console.ReadLine(); } Console.WriteLine(String.Format("\nSummary for write.")); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec)", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), bulkImportResponse.TotalTimeTaken.TotalSeconds)); Console.WriteLine(String.Format("Average RU consumption per document: {0}", (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); Console.WriteLine("---------------------------------------------------------------------\n "); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; }
private static void LogMetrics(IChangeFeedObserverContext context, BulkImportResponse bulkImportResponse) { Program.telemetryClient.TrackMetric("TotalInserted", bulkImportResponse.NumberOfDocumentsImported); Program.telemetryClient.TrackMetric("InsertedDocuments-Process:" + Process.GetCurrentProcess().Id, bulkImportResponse.NumberOfDocumentsImported); Program.telemetryClient.TrackMetric("TotalRUs", bulkImportResponse.TotalRequestUnitsConsumed); if (bulkImportResponse.BadInputDocuments.Count > 0) { Program.telemetryClient.TrackMetric("BadInputDocsCount", bulkImportResponse.BadInputDocuments.Count); } if (bulkImportResponse.FailedImports.Count > 0) { Program.telemetryClient.TrackMetric("FailedImportDocsCount", bulkImportResponse.FailedImports.First().DocumentsFailedToImport.Count); } }
public async Task ImportAsync <T>(string collectionName, List <T> toImport) where T : CosmosObject { int batchSize = 1000; var bulkExecutor = await BuildClientAsync(collectionName); var partitionKeyProperty = GetPartitionKeyProp <T>(); var batchedImportItems = toImport.Chunk(batchSize).ToList(); Console.WriteLine($"\nImporting {toImport.Count} Documents Batches of {batchedImportItems.Count}. Beginning."); await Task.Run(async() => { // Prepare for bulk update. var batchesRun = 0; long totalNumberOfDocumentsImported = 0; BulkImportResponse bulkImportResponse = null; do { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: batchedImportItems[batchesRun], enableUpsert: true, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: new CancellationTokenSource().Token); } catch (DocumentClientException de) { Console.WriteLine("Document client exception: {0}", de); } catch (Exception e) { Console.WriteLine("Exception: {0}", e); } LogProgress(bulkImportResponse); batchesRun++; totalNumberOfDocumentsImported += bulkImportResponse.NumberOfDocumentsImported; } while (totalNumberOfDocumentsImported < toImport.Count); }); }
public async Task <BulkImportResponse> BulkSendToNewCollection <T>(List <T> entityList) { BulkImportResponse bulkImportResponse = null; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; int attempts = 0; var objList = entityList.Cast <Object>(); do { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents : objList, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); attempts++; } while (bulkImportResponse.NumberOfDocumentsImported < entityList.Count() && attempts <= 5); var badDocumentList = bulkImportResponse.BadInputDocuments; #region log bulk Summary logger.LogInfo(String.Format("\n Batch Upload completed ")); logger.LogInfo("--------------------------------------------------------------------- "); logger.LogInfo(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds, 2), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds, 2), bulkImportResponse.TotalTimeTaken.TotalSeconds)); logger.LogInfo(String.Format("Average RU consumption per document: {0}", Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported, 2))); if (badDocumentList != null && badDocumentList.Count > 0) { logger.LogInfo($"bad Documents detected {badDocumentList.Count}"); } logger.LogInfo("---------------------------------------------------------------------\n "); #endregion return(bulkImportResponse); }
public async Task UpsertAsync(IEnumerable <TEntity> entities) { var documentCollection = await _documentClient.ReadDocumentCollectionAsync(GetCollectionUri()).ConfigureAwait(false); var bulkExecutor = new BulkExecutor(_documentClient as Documents.Client.DocumentClient, documentCollection); await bulkExecutor.InitializeAsync().ConfigureAwait(false); var entries = entities.Select(x => new DbEntry <TEntity>(x, _model.Analyzer, _jsonSerializerSettings)); BulkImportResponse bulkImportResponse = null; do { bulkImportResponse = await bulkExecutor .BulkImportAsync( entries, enableUpsert : true, disableAutomaticIdGeneration : true) .ConfigureAwait(false); } while (bulkImportResponse.NumberOfDocumentsImported < entries.Count()); }
public async Task ReadUploadInBatches(IDocumentQuery <dynamic> query) { #region batchVariables //initialize Batch Process variables int batchCount = 0; TotalRecordsRetrieved = 0; TotalRecordsSent = 0; var badEntities = new List <Object>(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); #endregion while (query.HasMoreResults) { batchCount++; logger.LogInfo($"BatchNumber : {batchCount} begins "); List <dynamic> entities = await GetCommonEntitiesinBatch(query); TotalRecordsRetrieved += entities.Count(); List <object> objEntities = new List <object>(); objEntities.AddRange((IEnumerable <object>)entities); List <string> strEntities = new List <string>(); foreach (var obj in objEntities) { strEntities.Add(JsonConvert.SerializeObject(obj)); } BulkImportResponse uploadResponse = new BulkImportResponse(); var scrubbedEntities = strEntities; if (entities.Any()) { if (noFilterScrubRules == null || noFilterScrubRules.Count == 0) { uploadResponse = await cosmosBulkImporter.BulkSendToNewCollection <dynamic>(entities); } else { var jEntities = new List <JToken>(); foreach (var sRule in noFilterScrubRules) { jEntities = objectScrubber.ScrubObjectList(scrubbedEntities, sRule); var nentities = new List <string>(); foreach (var jobj in jEntities) { nentities.Add(JsonConvert.SerializeObject(jobj)); } scrubbedEntities = nentities; sRule.RecordsUpdated += jEntities.Count; } var objDocuments = jEntities.Cast <Object>().ToList(); uploadResponse = await cosmosBulkImporter.BulkSendToNewCollection <dynamic>(objDocuments); } } else { logger.LogInfo("No Entities retrieved from query"); continue; } badEntities = uploadResponse.BadInputDocuments; TotalRecordsSent += uploadResponse.NumberOfDocumentsImported; logger.LogInfo($"Summary of Batch {batchCount} records retrieved {entities.Count()}. Records Uploaded: {uploadResponse.NumberOfDocumentsImported}"); logger.LogInfo($"Total records retrieved {TotalRecordsRetrieved}. Total records uploaded {TotalRecordsSent}"); logger.LogInfo($"Time elapsed : {stopwatch.Elapsed} "); } SetCompleteOnNoFilterRules(); stopwatch.Stop(); logger.LogInfo("Document Migration completed"); }
public static async Task RunBulkImportAsync(DocumentClient _client, IOptions <CosmosConfig> _cosmosConfig) { // Cleanup on start if set in config. DocumentCollection dataCollection = await SetupCosmosCollection(_client, _cosmosConfig); // Prepare for bulk import. // Creating documents with simple partition key here. string partitionKeyProperty = dataCollection.PartitionKey.Paths[0].Replace("/", ""); int numberOfDocumentsToGenerate = _cosmosConfig.Value.NumberOfDocumentsToImport; int numberOfBatches = _cosmosConfig.Value.NumberOfBatches; long numberOfDocumentsPerBatch = (long)Math.Floor(((double)numberOfDocumentsToGenerate) / numberOfBatches); // Set retry options high for initialization (default values). _client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; _client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor bulkExecutor = new BulkExecutor(_client, dataCollection); await bulkExecutor.InitializeAsync(); // Set retries to 0 to pass control to bulk executor. _client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; _client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; for (int i = 0; i < numberOfBatches; i++) { // Generate JSON-serialized documents to import. List <string> documentsToImportInBatch = DocumentBatch(partitionKeyProperty, numberOfDocumentsPerBatch, i); // Invoke bulk import API. var tasks = new List <Task>(); tasks.Add(Task.Run(async() => { Console.WriteLine(String.Format("Executing bulk import for batch {0}", i)); do { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: documentsToImportInBatch, enableUpsert: true, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: token); } catch (DocumentClientException de) { Console.WriteLine("Document _client exception: {0}", de); break; } catch (Exception e) { Console.WriteLine("Exception: {0}", e); break; } } while (bulkImportResponse.NumberOfDocumentsImported < documentsToImportInBatch.Count); Console.WriteLine(String.Format("\nSummary for batch {0}:", i)); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), bulkImportResponse.TotalTimeTaken.TotalSeconds)); Console.WriteLine(String.Format("Average RU consumption per document: {0}", (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); Console.WriteLine("---------------------------------------------------------------------\n "); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; }, token)); /* * tasks.Add(Task.Run(() => * { * char ch = Console.ReadKey(true).KeyChar; * if (ch == 'c' || ch == 'C') * { * tokenSource.Cancel(); * Console.WriteLine("\nTask cancellation requested."); * } * })); */ await Task.WhenAll(tasks); } Console.WriteLine("Overall summary:"); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", totalNumberOfDocumentsInserted, Math.Round(totalNumberOfDocumentsInserted / totalTimeTakenSec), Math.Round(totalRequestUnitsConsumed / totalTimeTakenSec), totalTimeTakenSec)); Console.WriteLine(String.Format("Average RU consumption per document: {0}", (totalRequestUnitsConsumed / totalNumberOfDocumentsInserted))); Console.WriteLine("--------------------------------------------------------------------- "); // Cleanup on finish if set in config. if (_cosmosConfig.Value.ShouldCleanupOnFinish) { Console.WriteLine("Deleting Database {0}", _cosmosConfig.Value.DatabaseName); await _client.DeleteDatabaseAsync(UriFactory.CreateDatabaseUri(_cosmosConfig.Value.DatabaseName)); } Console.WriteLine("\nPress any key to exit."); Console.ReadKey(); }
private static async Task MainAsync(string[] args) { try { DocumentClient cosmosDbClient = NewClient(); // Set retry options high during initialization (default values). cosmosDbClient.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; cosmosDbClient.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; DocumentCollection collection = GetCollectionIfExists( cosmosDbClient, DatabaseId, CollectionId); IBulkExecutor bulkExecutor = new BulkExecutor( cosmosDbClient, collection); await bulkExecutor.InitializeAsync(); // Set retries to 0 to pass complete control to bulk executor. cosmosDbClient.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; cosmosDbClient.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; long numberOfDocumentsToGenerate = BatchCount * DocumentCountPerBatch; BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; var tokenSource = new CancellationTokenSource(); CancellationToken token = tokenSource.Token; for (int i = 0; i < BatchCount; i++) { // Generate documents to import. var batch = new Product[DocumentCountPerBatch]; for (int n = 0; n < DocumentCountPerBatch; n++) { batch[n] = Product.NewRandom(); } // Invoke bulk import API. var tasks = new List <Task> { Task.Run(async() => { Console.WriteLine("Executing bulk import for batch {0}", i); do { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: batch, enableUpsert: true, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: token); } catch (DocumentClientException de) { Console.WriteLine("Document client exception: {0}", de); break; } catch (Exception e) { Console.WriteLine("Exception: {0}", e); break; } } while (bulkImportResponse.NumberOfDocumentsImported < DocumentCountPerBatch); Console.WriteLine(String.Format("\nSummary for batch {0}:", i)); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), bulkImportResponse.TotalTimeTaken.TotalSeconds)); Console.WriteLine(String.Format("Average RU consumption per document: {0}", (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); Console.WriteLine("---------------------------------------------------------------------\n "); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; }, token), Task.Run(() => { char ch = Console.ReadKey(true).KeyChar; if (ch == 'c' || ch == 'C') { tokenSource.Cancel(); Console.WriteLine("\nTask cancellation requested."); } }) }; await Task.WhenAll(tasks); } Console.WriteLine("Overall summary:"); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", totalNumberOfDocumentsInserted, Math.Round(totalNumberOfDocumentsInserted / totalTimeTakenSec), Math.Round(totalRequestUnitsConsumed / totalTimeTakenSec), totalTimeTakenSec)); Console.WriteLine(String.Format("Average RU consumption per document: {0}", (totalRequestUnitsConsumed / totalNumberOfDocumentsInserted))); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine("\nPress any key to exit."); Console.ReadKey(); } catch (Exception error) { Console.WriteLine("EXCEPTION: {0}", error); } Console.WriteLine("Press any key to quit..."); Console.ReadKey(); }
static void Main(string[] args) { Task.Run(async() => { DocumentClient srcClient = new DocumentClient(new Uri(Program.srcEndpoint), Program.srcAuthKey, Program.ConnectionPolicy); Uri srcCollectionLink = UriFactory.CreateDocumentCollectionUri(Program.srcDatabaseName, Program.srcCollectionName); DocumentCollection srcCollection = Program.ReadCollectionAsync(srcClient, srcDatabaseName, srcCollectionName, false).Result; DocumentClient destClient = new DocumentClient(new Uri(Program.destEndpoint), Program.destAuthKey, Program.ConnectionPolicy); Uri destCollectionLink = UriFactory.CreateDocumentCollectionUri(Program.destDatabaseName, Program.destCollectionName); DocumentCollection destCollection = Program.ReadCollectionAsync(destClient, destDatabaseName, destCollectionName, true).Result; Stopwatch watch = new Stopwatch(); watch.Start(); IBulkExecutor documentBulkImporter = new GraphBulkExecutor(destClient, destCollection); await documentBulkImporter.InitializeAsync(); BulkImportResponse bulkImportResponse = null; IEnumerable <JObject> vertexdocs = GetDocs(srcClient, srcCollection, true); try { bulkImportResponse = await documentBulkImporter.BulkImportAsync( vertexdocs.Select(vertex => ConvertToGremlinVertex(vertex)), enableUpsert: true, maxInMemorySortingBatchSize: 100000); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); throw; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); throw; } Console.WriteLine("Importing edges"); IEnumerable <JObject> edgeDocs = GetDocs(srcClient, srcCollection, false); try { bulkImportResponse = await documentBulkImporter.BulkImportAsync( edgeDocs.Select(edge => ConvertToGremlinEdge(edge)), enableUpsert: true, maxInMemorySortingBatchSize: 100000); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); throw; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); throw; } watch.Stop(); Console.WriteLine("Time Taken: " + watch.ElapsedMilliseconds); }).GetAwaiter().GetResult(); Console.WriteLine("Done, Please press any key to continue..."); Console.ReadLine(); }
private async Task RunBulkScenario(string collectionName) { var connectionPolicy = new ConnectionPolicy { ConnectionMode = ConnectionMode.Direct, ConnectionProtocol = Protocol.Tcp, }; connectionPolicy.PreferredLocations.Add(LocationNames.WestUS2); MongoBulkExecutor mongoBulkExecutor = new MongoBulkExecutor( new Uri(EndpointUrl), AuthorizationKey, DatabaseName, collectionName, connectionPolicy ); await mongoBulkExecutor.InitializeAsync(); BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; int numberOfBatches = int.Parse(ConfigurationManager.AppSettings["NumberOfBatches"]); long numberOfDocumentsPerBatch = long.Parse(ConfigurationManager.AppSettings["NumberOfDocumentsPerBatch"]); for (int i = 0; i < numberOfBatches; i++) { // Generate JSON-serialized documents to import. List <string> documentsToImportInBatch = new List <string>(); long prefix = i * numberOfDocumentsPerBatch; Trace.TraceInformation(String.Format("Generating {0} documents to import for batch {1}", numberOfDocumentsPerBatch, i)); documentsToImportInBatch = Util.GetSOHData(i, numberOfDocumentsPerBatch); // Invoke bulk import API. var tasks = new List <Task>(); tasks.Add(Task.Run(async() => { Trace.TraceInformation(String.Format("Executing bulk import for batch {0}", i)); do { try { bulkImportResponse = await mongoBulkExecutor.BulkImportAsync( documents: documentsToImportInBatch, enableUpsert: false); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); break; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); break; } } while (bulkImportResponse.NumberOfDocumentsImported < documentsToImportInBatch.Count); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; // Code to summarize running total: Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", totalNumberOfDocumentsInserted, Math.Round(totalNumberOfDocumentsInserted / totalTimeTakenSec), Math.Round(totalRequestUnitsConsumed / totalTimeTakenSec), totalTimeTakenSec)); //Console.WriteLine(String.Format("Average RU consumption per document: {0}", // (totalRequestUnitsConsumed / totalNumberOfDocumentsInserted))); //Console.WriteLine(String.Format("Total RU's consumed: {0}", // (totalRequestUnitsConsumed))); //Console.WriteLine(String.Format("Total # of Documents inserted: {0}", // (totalNumberOfDocumentsInserted))); //Trace.WriteLine(String.Format("\nSummary for batch {0}:", i)); //Trace.WriteLine("--------------------------------------------------------------------- "); //Trace.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", // bulkImportResponse.NumberOfDocumentsImported, // Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), // Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), // bulkImportResponse.TotalTimeTaken.TotalSeconds)); //Trace.WriteLine(String.Format("Average RU consumption per document: {0}", // (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); //Trace.WriteLine("---------------------------------------------------------------------\n "); Console.WriteLine("--------------------------------------------------------------------- "); }, token)); //tasks.Add(Task.Run(() => //{ // char ch = Console.ReadKey(true).KeyChar; // if (ch == 'c' || ch == 'C') // { // tokenSource.Cancel(); // Trace.WriteLine("\nTask cancellation requested."); // Console.WriteLine("\nCancelling import."); // } //})); await Task.WhenAll(tasks); Trace.WriteLine("\nPress any key to exit."); } }
/// <summary> /// Driver function for bulk import. /// </summary> /// <returns></returns> private async Task RunBulkImportAndUpdateAsync() { // Cleanup on start if set in config. DocumentCollection dataCollection = null; try { if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnStart"])) { Database database = Utils.GetDatabaseIfExists(client, DatabaseName); if (database != null) { await client.DeleteDatabaseAsync(database.SelfLink); } Trace.TraceInformation("Creating database {0}", DatabaseName); database = await client.CreateDatabaseAsync(new Database { Id = DatabaseName }); Trace.TraceInformation(String.Format("Creating collection {0} with {1} RU/s", CollectionName, CollectionThroughput)); dataCollection = await Utils.CreatePartitionedCollectionAsync(client, DatabaseName, CollectionName, CollectionThroughput); } else { dataCollection = Utils.GetCollectionIfExists(client, DatabaseName, CollectionName); if (dataCollection == null) { throw new Exception("The data collection does not exist"); } } } catch (Exception de) { Trace.TraceError("Unable to initialize, exception message: {0}", de.Message); throw; } // Prepare for bulk import. // Creating documents with simple partition key here. string partitionKeyProperty = dataCollection.PartitionKey.Paths[0].Replace("/", ""); long numberOfDocumentsToGenerate = long.Parse(ConfigurationManager.AppSettings["NumberOfDocumentsToUpdate"]); int numberOfBatches = int.Parse(ConfigurationManager.AppSettings["NumberOfBatches"]); long numberOfDocumentsPerBatch = (long)Math.Floor(((double)numberOfDocumentsToGenerate) / numberOfBatches); // Set retry options high for initialization (default values). client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor bulkExecutor = new BulkExecutor(client, dataCollection); await bulkExecutor.InitializeAsync(); // Set retries to 0 to pass control to bulk executor. client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; for (int i = 0; i < numberOfBatches; i++) { // Generate JSON-serialized documents to import. List <string> documentsToImportInBatch = new List <string>(); long prefix = i * numberOfDocumentsPerBatch; Trace.TraceInformation(String.Format("Generating {0} documents to import for batch {1}", numberOfDocumentsPerBatch, i)); for (int j = 0; j < numberOfDocumentsPerBatch; j++) { string partitionKeyValue = (prefix + j).ToString(); string id = partitionKeyValue; documentsToImportInBatch.Add(Utils.GenerateRandomDocumentString(id, partitionKeyProperty, partitionKeyValue)); } // Invoke bulk import API. var tasks = new List <Task>(); tasks.Add(Task.Run(async() => { Trace.TraceInformation(String.Format("Executing bulk import for batch {0}", i)); do { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: documentsToImportInBatch, enableUpsert: true, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); break; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); break; } } while (bulkImportResponse.NumberOfDocumentsImported < documentsToImportInBatch.Count); Trace.WriteLine(String.Format("\nSummary for batch {0}:", i)); Trace.WriteLine("--------------------------------------------------------------------- "); Trace.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), bulkImportResponse.TotalTimeTaken.TotalSeconds)); Trace.WriteLine(String.Format("Average RU consumption per document insert: {0}", (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); Trace.WriteLine("---------------------------------------------------------------------\n "); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; }, token)); /* * tasks.Add(Task.Run(() => * { * char ch = Console.ReadKey(true).KeyChar; * if (ch == 'c' || ch == 'C') * { * tokenSource.Cancel(); * Trace.WriteLine("\nTask cancellation requested."); * } * })); */ await Task.WhenAll(tasks); } Trace.WriteLine("Overall summary of bulk import:"); Trace.WriteLine("--------------------------------------------------------------------- "); Trace.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", totalNumberOfDocumentsInserted, Math.Round(totalNumberOfDocumentsInserted / totalTimeTakenSec), Math.Round(totalRequestUnitsConsumed / totalTimeTakenSec), totalTimeTakenSec)); Trace.WriteLine(String.Format("Average RU consumption per document insert: {0}", (totalRequestUnitsConsumed / totalNumberOfDocumentsInserted))); Trace.WriteLine("--------------------------------------------------------------------- \n"); //----------------------------------------------------------------------------------------------- // Prepare for bulk update. BulkUpdateResponse bulkUpdateResponse = null; long totalNumberOfDocumentsUpdated = 0; totalRequestUnitsConsumed = 0; totalTimeTakenSec = 0; tokenSource = new CancellationTokenSource(); token = tokenSource.Token; // Generate update operations. List <UpdateOperation> updateOperations = new List <UpdateOperation>(); // Set the name field. updateOperations.Add(new SetUpdateOperation <string>("Name", "UpdatedDoc")); // Unset the description field. updateOperations.Add(new UnsetUpdateOperation("description")); for (int i = 0; i < numberOfBatches; i++) { // Generate update items. List <UpdateItem> updateItemsInBatch = new List <UpdateItem>(); long prefix = i * numberOfDocumentsPerBatch; Trace.TraceInformation(String.Format("Generating {0} update items for batch {1}", numberOfDocumentsPerBatch, i)); for (int j = 0; j < numberOfDocumentsPerBatch; j++) { string partitionKeyValue = (prefix + j).ToString(); string id = partitionKeyValue; updateItemsInBatch.Add(new UpdateItem(id, partitionKeyValue, updateOperations)); } // Invoke bulk update API. var tasks = new List <Task>(); tasks.Add(Task.Run(async() => { Trace.TraceInformation(String.Format("Executing bulk update for batch {0}", i)); do { try { bulkUpdateResponse = await bulkExecutor.BulkUpdateAsync( updateItems: updateItemsInBatch, maxConcurrencyPerPartitionKeyRange: null, cancellationToken: token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); break; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); break; } } while (bulkUpdateResponse.NumberOfDocumentsUpdated < updateItemsInBatch.Count); Trace.WriteLine(String.Format("\nSummary for batch {0}:", i)); Trace.WriteLine("--------------------------------------------------------------------- "); Trace.WriteLine(String.Format("Updated {0} docs @ {1} updates/s, {2} RU/s in {3} sec", bulkUpdateResponse.NumberOfDocumentsUpdated, Math.Round(bulkUpdateResponse.NumberOfDocumentsUpdated / bulkUpdateResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkUpdateResponse.TotalRequestUnitsConsumed / bulkUpdateResponse.TotalTimeTaken.TotalSeconds), bulkUpdateResponse.TotalTimeTaken.TotalSeconds)); Trace.WriteLine(String.Format("Average RU consumption per document update: {0}", (bulkUpdateResponse.TotalRequestUnitsConsumed / bulkUpdateResponse.NumberOfDocumentsUpdated))); Trace.WriteLine("---------------------------------------------------------------------\n "); totalNumberOfDocumentsUpdated += bulkUpdateResponse.NumberOfDocumentsUpdated; totalRequestUnitsConsumed += bulkUpdateResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkUpdateResponse.TotalTimeTaken.TotalSeconds; }, token)); /* * tasks.Add(Task.Run(() => * { * char ch = Console.ReadKey(true).KeyChar; * if (ch == 'c' || ch == 'C') * { * tokenSource.Cancel(); * Trace.WriteLine("\nTask cancellation requested."); * } * })); */ await Task.WhenAll(tasks); } Trace.WriteLine("Overall summary of bulk update:"); Trace.WriteLine("--------------------------------------------------------------------- "); Trace.WriteLine(String.Format("Updated {0} docs @ {1} update/s, {2} RU/s in {3} sec", totalNumberOfDocumentsUpdated, Math.Round(totalNumberOfDocumentsUpdated / totalTimeTakenSec), Math.Round(totalRequestUnitsConsumed / totalTimeTakenSec), totalTimeTakenSec)); Trace.WriteLine(String.Format("Average RU consumption per document update: {0}", (totalRequestUnitsConsumed / totalNumberOfDocumentsUpdated))); Trace.WriteLine("--------------------------------------------------------------------- \n"); //----------------------------------------------------------------------------------------------- // Cleanup on finish if set in config. if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnFinish"])) { Trace.TraceInformation("Deleting Database {0}", DatabaseName); await client.DeleteDatabaseAsync(UriFactory.CreateDatabaseUri(DatabaseName)); } Trace.WriteLine("\nPress any key to exit."); Console.ReadKey(); }
public GivenACosmosConnection() { someDummyDocument = new DummyCosmosDocument { Id = someDocumentId, Data = someDocumentData, ETag = someDocumentETag, SelfLink = someSelfLink }; jsonSerializerSettings = new JsonSerializerSettings(); someRequestOptions = new RequestOptions(); someFeedOptions = new FeedOptions(); someDocument = new Document(); someDocument.LoadFrom(new JsonTextReader(new StringReader(JsonConvert.SerializeObject(someDummyDocument))), jsonSerializerSettings); someCollectionUri = UriFactory.CreateDocumentCollectionUri(someDatabaseId, someCollectionId); someDocumentUri = UriFactory.CreateDocumentUri(someDatabaseId, someCollectionId, someDocumentId); connectionPolicy = new ConnectionPolicy(); retryOptions = new Mock <RetryOptions>(); retryOptions.Object.MaxRetryAttemptsOnThrottledRequests = 9; retryOptions.Object.MaxRetryWaitTimeInSeconds = 30; connectionPolicy.RetryOptions = retryOptions.Object; documentClient = new Mock <IDocumentClient>(); documentClient.Setup(x => x.ConnectionPolicy).Returns(connectionPolicy); documentClient.Setup(x => x.CreateDocumentAsync(someCollectionUri, someDummyDocument, It.IsAny <RequestOptions>(), It.IsAny <bool>(), It.IsAny <CancellationToken>())) .Returns(Task.FromResult(new ResourceResponse <Document>(someDocument))); documentClient.Setup(x => x.CreateDocumentQuery <DummyCosmosDocument>(someCollectionUri, It.IsAny <FeedOptions>())) .Returns(new List <DummyCosmosDocument> { someDummyDocument }.AsQueryable().OrderBy(d => d.Id)); documentClient.Setup(x => x.CreateDocumentQuery <DummyCosmosDocument>(someCollectionUri, It.IsAny <SqlQuerySpec>(), It.IsAny <FeedOptions>())) .Returns(new List <DummyCosmosDocument> { someDummyDocument }.AsQueryable().OrderBy(d => d.Id)); databaseFactory = new Mock <IDatabaseFactory>(); databaseFactory.Setup(x => x.DocumentClient()).Returns(documentClient.Object); someDocumentCollection = new DocumentCollection { Id = someCollectionId }; bulkExecutor = new Mock <IBulkExecutor>(); bulkExecutorWrapper = new Mock <IBulkExecutorWrapper>(); someBulkImportResponse = new BulkImportResponse(); bulkExecutor .Setup(b => b.BulkImportAsync(It.Is <IEnumerable <CosmosDocumentBase> >(l => l.Count() == 1), true, false, null, null, default)) .Callback(() => { someBulkImportResponse.GetType().GetProperty("NumberOfDocumentsImported").SetValue(someBulkImportResponse, 1); someBulkImportResponse.GetType().GetProperty("TotalRequestUnitsConsumed").SetValue(someBulkImportResponse, 1); someBulkImportResponse.GetType().GetProperty("TotalTimeTaken").SetValue(someBulkImportResponse, TimeSpan.FromSeconds(10)); }) .Returns(Task.FromResult(someBulkImportResponse)); bulkExecutor .Setup(b => b.BulkImportAsync(It.Is <IEnumerable <CosmosDocumentBase> >(l => l.Count() == 2), true, false, null, null, default)) .Callback(() => { someBulkImportResponse.GetType().GetProperty("NumberOfDocumentsImported").SetValue(someBulkImportResponse, 2); someBulkImportResponse.GetType().GetProperty("TotalRequestUnitsConsumed").SetValue(someBulkImportResponse, 2); someBulkImportResponse.GetType().GetProperty("TotalTimeTaken").SetValue(someBulkImportResponse, TimeSpan.FromSeconds(20)); }) .Returns(Task.FromResult(someBulkImportResponse)); someDatabaseUri = UriFactory.CreateDatabaseUri(someDatabaseId); documentClient.Setup(x => x.CreateDocumentCollectionQuery(someDatabaseUri, It.IsAny <FeedOptions>())) .Returns(new List <DocumentCollection> { someDocumentCollection }.AsQueryable().OrderBy(x => x.PartitionKey)); databaseFactory.Setup(x => x.DocumentClient()).Returns(documentClient.Object); databaseFactory.Setup(x => x.BulkExecutor(documentClient.Object, someDocumentCollection)).Returns(bulkExecutor.Object); databaseFactory.Setup(x => x.BulkExecutorWrapper(bulkExecutor.Object)).Returns(bulkExecutorWrapper.Object); sut = new CosmosConnection(databaseFactory.Object, someDatabaseId, someBatchBulkSize); }
/// <summary> /// Driver function for bulk import. /// </summary> /// <returns></returns> private async Task RunBulkImportAsync() { // Cleanup on start if set in config. DocumentCollection dataCollection = null; try { if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnStart"])) { Database database = Utils.GetDatabaseIfExists(client, DatabaseName); if (database != null) { await client.DeleteDatabaseAsync(database.SelfLink); } Trace.TraceInformation("Creating database {0}", DatabaseName); database = await client.CreateDatabaseAsync(new Database { Id = DatabaseName }); Trace.TraceInformation(String.Format("Creating collection {0} with {1} RU/s", CollectionName, CollectionThroughput)); dataCollection = await Utils.CreatePartitionedCollectionAsync(client, DatabaseName, CollectionName, CollectionThroughput); } else { dataCollection = Utils.GetCollectionIfExists(client, DatabaseName, CollectionName); if (dataCollection == null) { throw new Exception("The data collection does not exist"); } } } catch (Exception de) { Trace.TraceError("Unable to initialize, exception message: {0}", de.Message); throw; } // Prepare for bulk import. // Creating documents with simple partition key here. string partitionKeyProperty = dataCollection.PartitionKey.Paths[0].Replace("/", ""); long numberOfDocumentsToGenerate = long.Parse(ConfigurationManager.AppSettings["NumberOfDocumentsToImport"]); int numberOfBatches = int.Parse(ConfigurationManager.AppSettings["NumberOfBatches"]); long numberOfDocumentsPerBatch = (long)Math.Floor(((double)numberOfDocumentsToGenerate) / numberOfBatches); // Set retry options high for initialization (default values). client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor bulkExecutor = new BulkExecutor(client, dataCollection); await bulkExecutor.InitializeAsync(); // Set retries to 0 to pass control to bulk executor. client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; if (bool.Parse(ConfigurationManager.AppSettings["ShouldConfigureIndex"])) { // Configure indexing policy ResourceResponse <DocumentCollection> containerResponse = await client.ReadDocumentCollectionAsync(UriFactory.CreateDocumentCollectionUri(DatabaseName, CollectionName)); // Set the indexing mode to consistent containerResponse.Resource.IndexingPolicy.IndexingMode = IndexingMode.Consistent; // Set the indexing to automatic containerResponse.Resource.IndexingPolicy.Automatic = true; // Add an included path containerResponse.Resource.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/partitionKey/*" }); // Add an included path containerResponse.Resource.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/reportId/*" }); // Add an included path containerResponse.Resource.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/parameterDateTime/*" }); // Add an included path containerResponse.Resource.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/parameterId/*" }); // Add an included path containerResponse.Resource.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/tailNumber/*" }); // Add an included path containerResponse.Resource.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/flightLegId/*" }); // Add an excluded path //containerResponse.Resource.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath { Path = "/*" }); // Add an excluded path //containerResponse.Resource.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath { Path = "/\"_etag\"/?" }); // Add a composite index //containerResponse.Resource.IndexingPolicy.CompositeIndexes.Add(new Collection<CompositePath> { new CompositePath() { Path = "/parameterDateTime", Order = CompositePathSortOrder.Descending }, new CompositePath() { Path = "/partitionKey", Order = CompositePathSortOrder.Descending } }); // Update container with changes ResourceResponse <DocumentCollection> result = await client.ReplaceDocumentCollectionAsync(containerResponse.Resource); Console.WriteLine("Updated index. Result: " + result.StatusCode); while (true) { ; } } BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; // for (int i = 0; i < numberOfBatches; i++) for (long i = 0; ; i++) { // Generate JSON-serialized documents to import. List <string> documentsToImportInBatch = new List <string>(); long prefix = i * numberOfDocumentsPerBatch; Trace.TraceInformation(String.Format("Generating {0} documents to import for batch {1}", numberOfDocumentsPerBatch, i)); for (int j = 0; j < numberOfDocumentsPerBatch; j++) { documentsToImportInBatch.Add(Utils.GenerateRandomDocumentString()); } // Invoke bulk import API. var tasks = new List <Task>(); tasks.Add(Task.Run(async() => { Trace.TraceInformation(String.Format("Executing bulk import for batch {0}", i)); do { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: documentsToImportInBatch, enableUpsert: true, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); break; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); break; } } while (bulkImportResponse.NumberOfDocumentsImported < documentsToImportInBatch.Count); Trace.WriteLine(String.Format("\nSummary for batch {0}:", i)); Trace.WriteLine("--------------------------------------------------------------------- "); Trace.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", bulkImportResponse.NumberOfDocumentsImported, Math.Round(bulkImportResponse.NumberOfDocumentsImported / bulkImportResponse.TotalTimeTaken.TotalSeconds), Math.Round(bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.TotalTimeTaken.TotalSeconds), bulkImportResponse.TotalTimeTaken.TotalSeconds)); Trace.WriteLine(String.Format("Average RU consumption per document: {0}", (bulkImportResponse.TotalRequestUnitsConsumed / bulkImportResponse.NumberOfDocumentsImported))); Trace.WriteLine("---------------------------------------------------------------------\n "); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; }, token)); /* * tasks.Add(Task.Run(() => * { * char ch = Console.ReadKey(true).KeyChar; * if (ch == 'c' || ch == 'C') * { * tokenSource.Cancel(); * Trace.WriteLine("\nTask cancellation requested."); * } * })); */ await Task.WhenAll(tasks); } Trace.WriteLine("Overall summary:"); Trace.WriteLine("--------------------------------------------------------------------- "); Trace.WriteLine(String.Format("Inserted {0} docs @ {1} writes/s, {2} RU/s in {3} sec", totalNumberOfDocumentsInserted, Math.Round(totalNumberOfDocumentsInserted / totalTimeTakenSec), Math.Round(totalRequestUnitsConsumed / totalTimeTakenSec), totalTimeTakenSec)); Trace.WriteLine(String.Format("Average RU consumption per document: {0}", (totalRequestUnitsConsumed / totalNumberOfDocumentsInserted))); Trace.WriteLine("--------------------------------------------------------------------- "); // Cleanup on finish if set in config. if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnFinish"])) { Trace.TraceInformation("Deleting Database {0}", DatabaseName); await client.DeleteDatabaseAsync(UriFactory.CreateDatabaseUri(DatabaseName)); } Trace.WriteLine("\nPress any key to exit."); Console.ReadKey(); }
/// <summary> /// Driver function for bulk import. /// </summary> /// <returns></returns> private async Task RunBulkImportAsync() { // Cleanup on start if set in config. DocumentCollection dataCollection = null; try { if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnStart"])) { Database database = Utils.GetDatabaseIfExists(client, DatabaseName); if (database != null) { await client.DeleteDatabaseAsync(database.SelfLink); } Trace.TraceInformation("Creating database {0}", DatabaseName); database = await client.CreateDatabaseAsync(new Database { Id = DatabaseName }); Trace.TraceInformation(String.Format("Creating collection {0} with {1} RU/s", CollectionName, CollectionThroughput)); dataCollection = await Utils.CreatePartitionedCollectionAsync(client, DatabaseName, CollectionName, CollectionThroughput); } else { dataCollection = Utils.GetCollectionIfExists(client, DatabaseName, CollectionName); if (dataCollection == null) { throw new Exception("The data collection does not exist"); } } } catch (Exception de) { Trace.TraceError("Unable to initialize, exception message: {0}", de.Message); throw; } // Prepare for bulk import. // Creating documents with simple partition key here. string partitionKeyProperty = dataCollection.PartitionKey.Paths[0].Replace("/", ""); long numberOfDocumentsToGenerate = long.Parse(ConfigurationManager.AppSettings["NumberOfDocumentsToImport"]); // Set retry options high for initialization (default values). client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor graphbulkExecutor = new GraphBulkExecutor(client, dataCollection); await graphbulkExecutor.InitializeAsync(); // Set retries to 0 to pass control to bulk executor. client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; BulkImportResponse vResponse = null; BulkImportResponse eResponse = null; try { vResponse = await graphbulkExecutor.BulkImportAsync( Utils.GenerateVertices(numberOfDocumentsToGenerate), enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); eResponse = await graphbulkExecutor.BulkImportAsync( Utils.GenerateEdges(numberOfDocumentsToGenerate), enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } Console.WriteLine("\nSummary for batch"); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine( "Inserted {0} graph elements ({1} vertices, {2} edges) @ {3} writes/s, {4} RU/s in {5} sec)", vResponse.NumberOfDocumentsImported + eResponse.NumberOfDocumentsImported, vResponse.NumberOfDocumentsImported, eResponse.NumberOfDocumentsImported, Math.Round( (vResponse.NumberOfDocumentsImported) / (vResponse.TotalTimeTaken.TotalSeconds + eResponse.TotalTimeTaken.TotalSeconds)), Math.Round( (vResponse.TotalRequestUnitsConsumed + eResponse.TotalRequestUnitsConsumed) / (vResponse.TotalTimeTaken.TotalSeconds + eResponse.TotalTimeTaken.TotalSeconds)), vResponse.TotalTimeTaken.TotalSeconds + eResponse.TotalTimeTaken.TotalSeconds); Console.WriteLine( "Average RU consumption per insert: {0}", (vResponse.TotalRequestUnitsConsumed + eResponse.TotalRequestUnitsConsumed) / (vResponse.NumberOfDocumentsImported + eResponse.NumberOfDocumentsImported)); Console.WriteLine("---------------------------------------------------------------------\n "); if (vResponse.BadInputDocuments.Count > 0 || eResponse.BadInputDocuments.Count > 0) { using (System.IO.StreamWriter file = new System.IO.StreamWriter(@".\BadVertices.txt", true)) { foreach (object doc in vResponse.BadInputDocuments) { file.WriteLine(doc); } } using (System.IO.StreamWriter file = new System.IO.StreamWriter(@".\BadEdges.txt", true)) { foreach (object doc in eResponse.BadInputDocuments) { file.WriteLine(doc); } } } // Cleanup on finish if set in config. if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnFinish"])) { Trace.TraceInformation("Deleting Database {0}", DatabaseName); await client.DeleteDatabaseAsync(UriFactory.CreateDatabaseUri(DatabaseName)); } //Trace.WriteLine("\nPress any key to exit."); //Console.ReadKey(); }
/// <summary> /// 批量导入 /// </summary> /// <returns></returns> public static async Task RunBulkImportAsync(string file, string orderId) { //读取文件 try { DocumentCollection dataCollection = null; if (client == null) { client = new DocumentClient(new Uri(ConfigurationManager.AppSettings["endpoint"]), ConfigurationManager.AppSettings["authKey"], ConnectionPolicy); } dataCollection = Utils.GetCollectionIfExists(client, DatabaseId, qrcodeTable); if (dataCollection == null) { throw new Exception("The data collection does not exist"); } // Prepare for bulk import. // Creating documents with simple partition key here. string partitionKeyProperty = dataCollection.PartitionKey.Paths[0].Replace("/", ""); // Set retry options high for initialization (default values). client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor bulkExecutor = new BulkExecutor(client, dataCollection); await bulkExecutor.InitializeAsync(); // Set retries to 0 to pass control to bulk executor. client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; BulkImportResponse bulkImportResponse = null; long totalNumberOfDocumentsInserted = 0; double totalRequestUnitsConsumed = 0; double totalTimeTakenSec = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; StreamReader sourceFileStream = new StreamReader(file, Encoding.Default); for (int i = 0; i < numberOfBatches && sourceFileStream != null; i++) { // Generate JSON-serialized documents to import. List <string> documentsToImportInBatch = new List <string>(); long prefix = i * numberOfDocumentsPerBatch; // 批量文档写入,读取文件, 写入bulk中 documentsToImportInBatch = Utils.AddDocumentFromFile(sourceFileStream, numberOfDocumentsPerBatch, orderId, out sourceFileStream); // Invoke bulk import API. var tasks = new List <Task>(); tasks.Add(Task.Run(async() => { //Trace.TraceInformation(String.Format("Executing bulk import for batch {0}", i)); do { try { bulkImportResponse = await bulkExecutor.BulkImportAsync( documents: documentsToImportInBatch, enableUpsert: true, disableAutomaticIdGeneration: true, maxConcurrencyPerPartitionKeyRange: null, maxInMemorySortingBatchSize: null, cancellationToken: token); } catch (DocumentClientException de) { //Trace.TraceError("Document client exception: {0}", de); break; } catch (Exception e) { //Trace.TraceError("Exception: {0}", e); break; } } while (bulkImportResponse.NumberOfDocumentsImported < documentsToImportInBatch.Count); totalNumberOfDocumentsInserted += bulkImportResponse.NumberOfDocumentsImported; totalRequestUnitsConsumed += bulkImportResponse.TotalRequestUnitsConsumed; totalTimeTakenSec += bulkImportResponse.TotalTimeTaken.TotalSeconds; logger.InfoFormat("fileName:{0} orderId:{1} ", file, orderId); logger.InfoFormat("totalNumberOfDocumentsInserted:{0} totalRequestUnitsConsumed:{1} totalTimeTakenSec:{2}", totalNumberOfDocumentsInserted, totalRequestUnitsConsumed, totalTimeTakenSec); }, token)); await Task.WhenAll(tasks); } } catch (Exception de) { logger.InfoFormat("Insert Error: ", de); } }