/// <inheritdoc/> async Task <IGraphLoader> IGraph.CreateBulkLoader() { var executor = new GraphBulkExecutor(CloneClient(), Container); await executor.InitializeAsync(); return(new BulkImporter(executor, _serializer, _logger)); }
private async Task RunBulkImportEdgesAsync( GraphBulkExecutor graphbulkExecutor, EdgeSpec spec, int fromCount, int toCount) { var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; BulkImportResponse response = null; Random r = new Random(); for (int i = 0; i < fromCount; i++) { List <GremlinEdge> edges = new List <GremlinEdge>(); int edgeCount = r.Next(spec.maxCount - spec.minCount) + spec.minCount; for (int j = 0; j < edgeCount; j++) { HashSet <int> neighbors = new HashSet <int>(); int destination = r.Next(toCount); if (neighbors.Contains(destination)) { j--; continue; } neighbors.Add(destination); string id = createEdgeId(i, spec.from, destination, spec.to); GremlinEdge e = new GremlinEdge( id, spec.label, createVertexId(spec.from, i), createVertexId(spec.to, destination), spec.from, spec.to, createVertexId(spec.from, i), createVertexId(spec.to, destination)); for (int k = 0; k < spec.numberOfProperties; k++) { e.AddProperty("property_" + k, Guid.NewGuid().ToString()); } edges.Add(e); } try { response = await graphbulkExecutor.BulkImportAsync( edges, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } } }
private async Task RunAsync() { GraphSpec spec = JsonConvert.DeserializeObject <GraphSpec>(System.IO.File.ReadAllText("./spec.json")); var dataCollection = client.ReadDocumentCollectionAsync(UriFactory.CreateDocumentCollectionUri(DatabaseName, CollectionName)).Result.Resource; client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; GraphBulkExecutor graphbulkExecutor = new GraphBulkExecutor(client, dataCollection); graphbulkExecutor.InitializeAsync().Wait(); client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; foreach (var vertexSpec in spec.vertices) { await RunBulkImportVerticesAsync(graphbulkExecutor, vertexSpec); } foreach (var edgeSpec in spec.edges) { int sourceCount = spec.vertices.FindLast(vs => vs.label.Equals(edgeSpec.from)).count; int targeCount = spec.vertices.FindLast(vs => vs.label.Equals(edgeSpec.to)).count; await RunBulkImportEdgesAsync(graphbulkExecutor, edgeSpec, sourceCount, targeCount); } Console.WriteLine("Finished importing events"); }
private async Task RunBulkImportVerticesAsync(GraphBulkExecutor graphbulkExecutor, VertexSpec vertexSpec) { var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; BulkImportResponse response = null; List <GremlinVertex> vObjects = null; int currentCount = 0; while (currentCount < vertexSpec.count) { vObjects = new List <GremlinVertex>(); for (int i = 0; i < 100000 && currentCount < vertexSpec.count; i++) { string id = createVertexId(vertexSpec.label, currentCount); GremlinVertex v = new GremlinVertex(id, vertexSpec.label); for (int j = 0; j < vertexSpec.numberOfProperties; j++) { v.AddProperty("property_" + j, Guid.NewGuid().ToString()); } v.AddProperty(pkPropertyName, id); currentCount += 1; vObjects.Add(v); } try { response = await graphbulkExecutor.BulkImportAsync( vObjects, enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } } }
/// <summary> /// Driver function for bulk import. /// </summary> /// <returns></returns> private async Task RunBulkImportAsync() { // Cleanup on start if set in config. DocumentCollection dataCollection = null; try { if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnStart"])) { Database database = Utils.GetDatabaseIfExists(client, DatabaseName); if (database != null) { await client.DeleteDatabaseAsync(database.SelfLink); } Trace.TraceInformation("Creating database {0}", DatabaseName); database = await client.CreateDatabaseAsync(new Database { Id = DatabaseName }); Trace.TraceInformation(String.Format("Creating collection {0} with {1} RU/s", CollectionName, CollectionThroughput)); dataCollection = await Utils.CreatePartitionedCollectionAsync(client, DatabaseName, CollectionName, CollectionThroughput); } else { dataCollection = Utils.GetCollectionIfExists(client, DatabaseName, CollectionName); if (dataCollection == null) { throw new Exception("The data collection does not exist"); } } } catch (Exception de) { Trace.TraceError("Unable to initialize, exception message: {0}", de.Message); throw; } // Prepare for bulk import. // Creating documents with simple partition key here. string partitionKeyProperty = dataCollection.PartitionKey.Paths[0].Replace("/", ""); long numberOfDocumentsToGenerate = long.Parse(ConfigurationManager.AppSettings["NumberOfDocumentsToImport"]); // Set retry options high for initialization (default values). client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 30; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 9; IBulkExecutor graphbulkExecutor = new GraphBulkExecutor(client, dataCollection); await graphbulkExecutor.InitializeAsync(); // Set retries to 0 to pass control to bulk executor. client.ConnectionPolicy.RetryOptions.MaxRetryWaitTimeInSeconds = 0; client.ConnectionPolicy.RetryOptions.MaxRetryAttemptsOnThrottledRequests = 0; var tokenSource = new CancellationTokenSource(); var token = tokenSource.Token; BulkImportResponse vResponse = null; BulkImportResponse eResponse = null; try { vResponse = await graphbulkExecutor.BulkImportAsync( Utils.GenerateVertices(numberOfDocumentsToGenerate), enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); eResponse = await graphbulkExecutor.BulkImportAsync( Utils.GenerateEdges(numberOfDocumentsToGenerate), enableUpsert : true, disableAutomaticIdGeneration : true, maxConcurrencyPerPartitionKeyRange : null, maxInMemorySortingBatchSize : null, cancellationToken : token); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); } catch (Exception e) { Trace.TraceError("Exception: {0}", e); } Console.WriteLine("\nSummary for batch"); Console.WriteLine("--------------------------------------------------------------------- "); Console.WriteLine( "Inserted {0} graph elements ({1} vertices, {2} edges) @ {3} writes/s, {4} RU/s in {5} sec)", vResponse.NumberOfDocumentsImported + eResponse.NumberOfDocumentsImported, vResponse.NumberOfDocumentsImported, eResponse.NumberOfDocumentsImported, Math.Round( (vResponse.NumberOfDocumentsImported) / (vResponse.TotalTimeTaken.TotalSeconds + eResponse.TotalTimeTaken.TotalSeconds)), Math.Round( (vResponse.TotalRequestUnitsConsumed + eResponse.TotalRequestUnitsConsumed) / (vResponse.TotalTimeTaken.TotalSeconds + eResponse.TotalTimeTaken.TotalSeconds)), vResponse.TotalTimeTaken.TotalSeconds + eResponse.TotalTimeTaken.TotalSeconds); Console.WriteLine( "Average RU consumption per insert: {0}", (vResponse.TotalRequestUnitsConsumed + eResponse.TotalRequestUnitsConsumed) / (vResponse.NumberOfDocumentsImported + eResponse.NumberOfDocumentsImported)); Console.WriteLine("---------------------------------------------------------------------\n "); if (vResponse.BadInputDocuments.Count > 0 || eResponse.BadInputDocuments.Count > 0) { using (System.IO.StreamWriter file = new System.IO.StreamWriter(@".\BadVertices.txt", true)) { foreach (object doc in vResponse.BadInputDocuments) { file.WriteLine(doc); } } using (System.IO.StreamWriter file = new System.IO.StreamWriter(@".\BadEdges.txt", true)) { foreach (object doc in eResponse.BadInputDocuments) { file.WriteLine(doc); } } } // Cleanup on finish if set in config. if (bool.Parse(ConfigurationManager.AppSettings["ShouldCleanupOnFinish"])) { Trace.TraceInformation("Deleting Database {0}", DatabaseName); await client.DeleteDatabaseAsync(UriFactory.CreateDatabaseUri(DatabaseName)); } //Trace.WriteLine("\nPress any key to exit."); //Console.ReadKey(); }
static void Main(string[] args) { Task.Run(async() => { DocumentClient srcClient = new DocumentClient(new Uri(Program.srcEndpoint), Program.srcAuthKey, Program.ConnectionPolicy); Uri srcCollectionLink = UriFactory.CreateDocumentCollectionUri(Program.srcDatabaseName, Program.srcCollectionName); DocumentCollection srcCollection = Program.ReadCollectionAsync(srcClient, srcDatabaseName, srcCollectionName, false).Result; DocumentClient destClient = new DocumentClient(new Uri(Program.destEndpoint), Program.destAuthKey, Program.ConnectionPolicy); Uri destCollectionLink = UriFactory.CreateDocumentCollectionUri(Program.destDatabaseName, Program.destCollectionName); DocumentCollection destCollection = Program.ReadCollectionAsync(destClient, destDatabaseName, destCollectionName, true).Result; Stopwatch watch = new Stopwatch(); watch.Start(); IBulkExecutor documentBulkImporter = new GraphBulkExecutor(destClient, destCollection); await documentBulkImporter.InitializeAsync(); BulkImportResponse bulkImportResponse = null; IEnumerable <JObject> vertexdocs = GetDocs(srcClient, srcCollection, true); try { bulkImportResponse = await documentBulkImporter.BulkImportAsync( vertexdocs.Select(vertex => ConvertToGremlinVertex(vertex)), enableUpsert: true, maxInMemorySortingBatchSize: 100000); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); throw; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); throw; } Console.WriteLine("Importing edges"); IEnumerable <JObject> edgeDocs = GetDocs(srcClient, srcCollection, false); try { bulkImportResponse = await documentBulkImporter.BulkImportAsync( edgeDocs.Select(edge => ConvertToGremlinEdge(edge)), enableUpsert: true, maxInMemorySortingBatchSize: 100000); } catch (DocumentClientException de) { Trace.TraceError("Document client exception: {0}", de); throw; } catch (Exception e) { Trace.TraceError("Exception: {0}", e); throw; } watch.Stop(); Console.WriteLine("Time Taken: " + watch.ElapsedMilliseconds); }).GetAwaiter().GetResult(); Console.WriteLine("Done, Please press any key to continue..."); Console.ReadLine(); }