/** * ==== Multiple documents with `BulkAllObservable` helper * * Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to * concern yourself with retry, backoff or chunking mechanics. * Multiple documents can be indexed using the `BulkAll` method and `Wait()` extension method. * * This helper exposes functionality to automatically retry / backoff in the event of an indexing failure, * and to control the number of documents indexed in a single HTTP request. In the example below each request will contain 1000 documents, * chunked from the original input. In the event of a large number of documents this could result in many HTTP requests, each containing * 1000 documents (the last request may contain less, depending on the total number). * * The helper will also lazily enumerate an `IEnumerable<T>` collection, allowing you to index a large number of documents easily. */ public async Task BulkDocumentsWithObservableHelper() { // hide var people = new [] { new Person { Id = 1, FirstName = "Martijn", LastName = "Laarman" }, new Person { Id = 2, FirstName = "Stuart", LastName = "Cam" }, new Person { Id = 3, FirstName = "Russ", LastName = "Cam" } // snip }; var bulkAllObservable = client.BulkAll(people, b => b .Index("people") .BackOffTime("30s") //<1> how long to wait between retries .BackOffRetries(2) //<2> how many retries are attempted if a failure occurs .RefreshOnCompleted() .MaxDegreeOfParallelism(Environment.ProcessorCount) .Size(1000) // <3> items per bulk request ) .Wait(TimeSpan.FromMinutes(15), next => //<4> perform the indexing and wait up to 15 minutes, whilst the BulkAll calls are asynchronous this is a blocking operation { // do something e.g. write number of pages to console }); }
public void BulkAdd(List <LogEvent> logEvents) { var bulkAllObservable = _client.BulkAll(logEvents, l => l .Index(IndexName) .BackOffTime("30s") .BackOffRetries(2) .RefreshOnCompleted() .MaxDegreeOfParallelism(Environment.ProcessorCount) .Size(1000)) .Wait(TimeSpan.FromMinutes(15), next => { }); }
public bool BulkAll <T>(string indexName, IEnumerable <T> docs) where T : class { const int size = 1000; var tokenSource = new CancellationTokenSource(); var observableBulk = B2BElasticClient.BulkAll(docs, b => b .Index(indexName) .BackOffTime(TimeSpan.FromSeconds(10)) .BackOffRetries(2) .RefreshOnCompleted() .MaxDegreeOfParallelism(Environment.ProcessorCount) .Size(size) .BufferToBulk((r, buffer) => r.IndexMany(buffer)), tokenSource.Token ); var countdownEvent = new CountdownEvent(1); Exception exception = null; void OnCompleted() { Logger.Info("BulkAll Finished"); countdownEvent.Signal(); } var bulkAllObserver = new BulkAllObserver( onNext: response => { Logger.Info($"Indexed {response.Page * size} with {response.Retries} retries"); }, onError: ex => { Logger.Info("BulkAll Error : {0}", ex); exception = ex; countdownEvent.Signal(); }, OnCompleted); observableBulk.Subscribe(bulkAllObserver); countdownEvent.Wait(tokenSource.Token); if (exception != null) { Logger.Info("BulkHotelGeo Error : {0}", exception); return(false); } else { return(true); } }
public void BulkAllGeneric <T>(List <T> elementList, string indexName) where T : class { var elementCount = elementList.Count(); var count = 0; var waitHandle = new CountdownEvent(1); var bulkAll = _client.BulkAll(elementList, b => b .Index(indexName) .BackOffRetries(5) .BackOffTime(TimeSpan.FromSeconds(15)) .RefreshOnCompleted(true) .MaxDegreeOfParallelism(2) .BulkResponseCallback((c) => { if (!c.IsValid) { _logger.Debug($"Bulk Response was invalid: {c.DebugInformation}"); } }) .Size(1000)); bulkAll.Subscribe(observer: new BulkAllObserver( onNext: (b) => { count = count + 1000; if (count > elementCount) { count = elementCount; } _logger.Debug($"Indexed group of {typeof(T)}: {count} of {elementCount}"); }, onError: (e) => { _logger.Error(e, e.Message); waitHandle.Signal(); }, onCompleted: () => { waitHandle.Signal(); })); waitHandle.Wait(); }
public void BulkAll <T>(IEnumerable <T> documents, string indexName, int batchSize) where T : class { var count = 0; var elementCount = documents.Count(); var timer = Stopwatch.StartNew(); var waitHandle = new ManualResetEvent(false); Exception bulkAllException = null; var bulkAll = _client.BulkAll(documents, b => b .Index(indexName) .BackOffRetries(15) .BackOffTime(TimeSpan.FromSeconds(30)) .RefreshOnCompleted(true) .MaxDegreeOfParallelism(4) .Size(batchSize)); bulkAll.Subscribe(new BulkAllObserver( b => { count = count + batchSize; if (count > elementCount) { count = elementCount; } _logger.Debug($"Indexed group of Document: {count} of {elementCount}"); }, e => { _logger.Error(e, e.Message); bulkAllException = e; waitHandle.Set(); }, () => { waitHandle.Set(); })); waitHandle.WaitOne(); if (bulkAllException != null) { throw bulkAllException; } SendLog(null, null, timer.ElapsedMilliseconds, "Bulk completed for Document"); }
public void BulkAll(List <RoatpProviderDocument> documents, string indexName) { var count = 0; var elementCount = documents.Count(); var batchSize = 1000; var timer = Stopwatch.StartNew(); var waitHandle = new CountdownEvent(1); var bulkAll = _client.BulkAll(documents, b => b .Index(indexName) .BackOffRetries(15) .BackOffTime(TimeSpan.FromSeconds(30)) .RefreshOnCompleted(true) .MaxDegreeOfParallelism(4) .Size(batchSize)); bulkAll.Subscribe(observer: new BulkAllObserver( onNext: (b) => { count = count + batchSize; if (count > elementCount) { count = elementCount; } _logger.Debug($"Indexed group of RoatpProviderDocument: {count} of {elementCount}"); }, onError: (e) => { _logger.Error(e, e.Message); throw e; }, onCompleted: () => { waitHandle.Signal(); })); waitHandle.Wait(); SendLog(null, null, timer.ElapsedMilliseconds, "Bulk completed for RoatpProviderDocument"); }
private async Task IndexMockData(IElasticClient c, int requestsPerIteration) { var tokenSource = new CancellationTokenSource(); await c.Indices.DeleteAsync(Index <Project>(), ct : tokenSource.Token); var observableBulk = c.BulkAll(MockDataGenerator(100000), f => f .MaxDegreeOfParallelism(10) .BackOffTime(TimeSpan.FromSeconds(10)) .BackOffRetries(2) .Size(1000) .RefreshOnCompleted() , tokenSource.Token); await observableBulk.ForEachAsync(x => { }, tokenSource.Token); var statsRequest = new NodesStatsRequest(NodesStatsMetric.Http); var nodeStats = await c.Nodes.StatsAsync(statsRequest, tokenSource.Token); AssertHttpStats(c, nodeStats, -1, requestsPerIteration); }
private void Flush(object source, ElapsedEventArgs ea) { // TODO use a sync object to avoid concurrency instead of copy the list and clear... if (_toBeAdded.Any()) { var docs = _toBeAdded.ToList(); _toBeAdded.Clear(); var waitHandle = new CountdownEvent(1); var bulkAll = _elasticClient.BulkAll(docs, b => b .Index(_indexName) /* index */ .Type <ZeroTask>() .BackOffRetries(2) .BackOffTime("30s") .RefreshOnCompleted(true) .MaxDegreeOfParallelism(4) .Size(1000) ); bulkAll.Subscribe(new BulkAllObserver( //onNext: (b) => { Console.Write("."); }, onError: (e) => throw new Exception("There is a problem with ElasticSearch", e), onCompleted: () => waitHandle.Signal() )); waitHandle.Wait(); } if (!_toBeDeleted.Any()) { return; } var toBeDeleted = _toBeDeleted.ToList(); _toBeDeleted.Clear(); foreach (var id in toBeDeleted) { _elasticClient.Delete <T>(id, d => d.Index(_indexName)); } }
private async Task <int> PushAllImpl(IEnumerable <IDictionary <string, string> > lazyEnumerable, string mappingName) { var seenPages = 0; var tcs = new TaskCompletionSource <int>(); var observableBulk = _client.BulkAll(lazyEnumerable, bulkDescriptor => { bulkDescriptor .BufferToBulk((x, batch) => x.IndexMany(batch, (bd, d) => bd .Id(d["PartitionKey"] + d["RowKey"])) .Index(_indexNamer.BuildName(batch[0]["@timestamp"], mappingName) )) .Type(mappingName); if (_setPipeline) { bulkDescriptor.Pipeline(mappingName.ToLower()); } return(bulkDescriptor .MaxDegreeOfParallelism(5) .Size(_batchSize)); }); var observer = new BulkAllObserver( onNext: (b) => Interlocked.Increment(ref seenPages), onCompleted: () => { tcs.SetResult(seenPages); }, onError: e => { TheTrace.TraceWarning(e.ToString()); tcs.SetException(e); } ); observableBulk.Subscribe(observer); return(await tcs.Task.ConfigureAwait(false)); }
public async Task StoreBulk <T>(IList <T> documents, int size, string indexName = "") where T : class { //string index = string.IsNullOrEmpty(indexName) ? GenerateIndexName<T>() : indexName; //await CreateIndex<T>(index); var bulkAllObservable = _elasticClient.BulkAll(documents, b => b .Index("news") // how long to wait between retries .BackOffTime("30s") // how many retries are attempted if a failure occurs .BackOffRetries(2) // refresh the index once the bulk operation completes .RefreshOnCompleted() // how many concurrent bulk requests to make .MaxDegreeOfParallelism(Environment.ProcessorCount) // number of items per bulk request .Size(size) ) // Perform the indexing, waiting up to 15 minutes. // Whilst the BulkAll calls are asynchronous this is a blocking operation .Wait(TimeSpan.FromMinutes(15), next => { // do something on each response e.g. write number of batches indexed to console }); }
protected override BulkAllObservable <T> ExecuteCore(IElasticClient client, string index) { return(client.BulkAll(_documents, desc => BuildQueryCore(desc, index, _refreshOnSave))); }
public void IndexUsers(string usersPath, string badgesPath) { CreateUsersIndexIfNotExists(); _client.Indices.UpdateSettings(UsersIndex, u => u .IndexSettings(i => i .RefreshInterval("-1") ) ); var size = 1000; var seenPages = 0; var indexedDocs = 0; var totalDocs = 0; var handle = new ManualResetEvent(false); var users = StackOverflowData.GetUsers(usersPath); var observableBulk = _client.BulkAll(users, f => f .MaxDegreeOfParallelism(16) .BackOffTime(TimeSpan.FromSeconds(10)) .BackOffRetries(2) .Size(size) .RefreshOnCompleted() .Index(UsersIndex) ); Exception exception = null; var bulkObserver = new BulkAllObserver( onError: e => { exception = e; handle.Set(); }, onCompleted: () => handle.Set(), onNext: b => { Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid)); Interlocked.Add(ref totalDocs, b.Items.Count); Interlocked.Increment(ref seenPages); Log.WriteLine($"indexed users page {seenPages}, {indexedDocs} out of {totalDocs}"); } ); var stopWatch = Stopwatch.StartNew(); observableBulk.Subscribe(bulkObserver); handle.WaitOne(); if (exception != null) { throw exception; } Log.WriteLine($"Time taken to index users: {stopWatch.Elapsed}"); // update user badges seenPages = 0; indexedDocs = 0; totalDocs = 0; handle = new ManualResetEvent(false); var badgeMetas = StackOverflowData.GetBadgeMetas(badgesPath); var observableBadgeBulk = _client.BulkAll(badgeMetas, f => f .Index <User>() .MaxDegreeOfParallelism(8) .Size(size) .BufferToBulk((bulk, badges) => { foreach (var badge in badges) { bulk.Update <User>(u => u .Script(s => s .Source(@"if (ctx._source.badges == null) { ctx._source.badges = [params.badge]; } else if (ctx._source.badges.any(b -> b.name == params.badge.name) == false) { ctx._source.badges.add(params.badge); }") .Params(d => d .Add("badge", badge.Badge) ) ) .Id(badge.UserId) .RetriesOnConflict(10) ); } }) .RefreshOnCompleted() ); bulkObserver = new BulkAllObserver( onError: e => { exception = e; handle.Set(); }, onCompleted: () => handle.Set(), onNext: b => { Interlocked.Add(ref indexedDocs, b.Items.Count(i => i.IsValid)); Interlocked.Add(ref totalDocs, b.Items.Count); Interlocked.Increment(ref seenPages); Log.WriteLine($"indexed badges page {seenPages}, {indexedDocs} out of {totalDocs}"); } ); stopWatch.Restart(); observableBadgeBulk.Subscribe(bulkObserver); handle.WaitOne(); if (exception != null) { throw exception; } Log.WriteLine($"Time taken to index badges: {stopWatch.Elapsed}"); _client.Indices.UpdateSettings(UsersIndex, u => u .IndexSettings(i => i .RefreshInterval("30s") ) ); }