private async Task<ReindexResult> ReindexAsync(ReindexWorkItem workItem, WorkItemContext context, int startProgress = 0, int endProgress = 100, DateTime? startTime = null) { const int pageSize = 100; const string scroll = "5m"; string timestampField = workItem.TimestampField ?? "_timestamp"; long completed = 0; var scanResults = await _client.SearchAsync<JObject>(s => s .Index(workItem.OldIndex) .AllTypes() .Filter(f => startTime.HasValue ? f.Range(r => r.OnField(timestampField).Greater(startTime.Value)) : f.MatchAll()) .From(0).Take(pageSize) .SearchType(SearchType.Scan) .Scroll(scroll)).AnyContext(); if (!scanResults.IsValid || scanResults.ScrollId == null) { Logger.Error().Message("Invalid search result: message={0}", scanResults.GetErrorMessage()).Write(); return new ReindexResult(); } long totalHits = scanResults.Total; var parentMap = workItem.ParentMaps?.ToDictionary(p => p.Type, p => p.ParentPath) ?? new Dictionary<string, string>(); var results = await _client.ScrollAsync<JObject>(scroll, scanResults.ScrollId).AnyContext(); while (results.Documents.Any()) { var bulkDescriptor = new BulkDescriptor(); foreach (var hit in results.Hits) { var h = hit; // TODO: Add support for doing JObject based schema migrations bulkDescriptor.Index<JObject>(idx => { idx .Index(workItem.NewIndex) .Type(h.Type) .Id(h.Id) .Document(h.Source); if (String.IsNullOrEmpty(h.Type)) Logger.Error().Message("Hit type empty. id={0}", h.Id).Write(); if (parentMap.ContainsKey(h.Type)) { if (String.IsNullOrEmpty(parentMap[h.Type])) Logger.Error().Message("Parent map has empty value. id={0} type={1}", h.Id, h.Type).Write(); var parentId = h.Source.SelectToken(parentMap[h.Type]); if (!String.IsNullOrEmpty(parentId?.ToString())) idx.Parent(parentId.ToString()); else Logger.Error().Message("Unable to get parent id. id={0} path={1}", h.Id, parentMap[h.Type]).Write(); } return idx; }); } var bulkResponse = await _client.BulkAsync(bulkDescriptor).AnyContext(); if (!bulkResponse.IsValid) { string message = $"Reindex bulk error: old={workItem.OldIndex} new={workItem.NewIndex} completed={completed} message={bulkResponse.GetErrorMessage()}"; Logger.Warn().Message(message).Write(); // try each doc individually so we can see which doc is breaking us foreach (var hit in results.Hits) { var h = hit; var response = await _client.IndexAsync<JObject>(h.Source, d => { d .Index(workItem.NewIndex) .Type(h.Type) .Id(h.Id); if (parentMap.ContainsKey(h.Type)) { var parentId = h.Source.SelectToken(parentMap[h.Type]); if (!String.IsNullOrEmpty(parentId?.ToString())) d.Parent(parentId.ToString()); else Logger.Error().Message("Unable to get parent id. id={0} path={1}", h.Id, parentMap[h.Type]).Write(); } return d; }).AnyContext(); if (response.IsValid) continue; message = $"Reindex error: old={workItem.OldIndex} new={workItem.NewIndex} id={hit.Id} completed={completed} message={response.GetErrorMessage()}"; Logger.Error().Message(message).Write(); var errorDoc = new JObject(new { h.Type, Content = h.Source.ToString(Formatting.Indented) }); if (parentMap.ContainsKey(h.Type)) { var parentId = h.Source.SelectToken(parentMap[h.Type]); if (!String.IsNullOrEmpty(parentId?.ToString())) errorDoc["ParentId"] = parentId.ToString(); else Logger.Error().Message("Unable to get parent id. id={0} path={1}", h.Id, parentMap[h.Type]).Write(); } // put the document into an error index response = await _client.IndexAsync<JObject>(errorDoc, d => { d .Index(workItem.NewIndex + "-error") .Id(h.Id); return d; }).AnyContext(); if (response.IsValid) continue; throw new ReindexException(response.ConnectionStatus, message); } } completed += bulkResponse.Items.Count(); await context.ReportProgressAsync(CalculateProgress(totalHits, completed, startProgress, endProgress), $"Total: {totalHits} Completed: {completed}").AnyContext(); Logger.Info().Message($"Reindex Progress: {CalculateProgress(totalHits, completed, startProgress, endProgress)} Completed: {completed} Total: {totalHits}").Write(); results = await _client.ScrollAsync<JObject>(scroll, results.ScrollId).AnyContext(); } return new ReindexResult { Total = totalHits, Completed = completed }; }
public virtual void ConfigureIndexes(IElasticClient client, IEnumerable<IElasticIndex> indexes = null) { if (indexes == null) indexes = GetIndexes(); foreach (var idx in indexes) { int currentVersion = GetAliasVersion(client, idx.AliasName); IIndicesOperationResponse response = null; var templatedIndex = idx as ITemplatedElasticIndex; if (templatedIndex != null) response = client.PutTemplate(idx.VersionedName, template => templatedIndex.CreateTemplate(template).AddAlias(idx.AliasName)); else if (!client.IndexExists(idx.VersionedName).Exists) response = client.CreateIndex(idx.VersionedName, descriptor => idx.CreateIndex(descriptor).AddAlias(idx.AliasName)); Debug.Assert(response == null || response.IsValid, response?.ServerError != null ? response.ServerError.Error : "An error occurred creating the index or template."); // Add existing indexes to the alias. if (!client.AliasExists(idx.AliasName).Exists) { if (templatedIndex != null) { var indices = client.IndicesStats().Indices.Where(kvp => kvp.Key.StartsWith(idx.VersionedName)).Select(kvp => kvp.Key).ToList(); if (indices.Count > 0) { var descriptor = new AliasDescriptor(); foreach (string name in indices) descriptor.Add(add => add.Index(name).Alias(idx.AliasName)); response = client.Alias(descriptor); } } else { response = client.Alias(a => a.Add(add => add.Index(idx.VersionedName).Alias(idx.AliasName))); } Debug.Assert(response != null && response.IsValid, response?.ServerError != null ? response.ServerError.Error : "An error occurred creating the alias."); } // already on current version if (currentVersion >= idx.Version || currentVersion < 1) continue; var reindexWorkItem = new ReindexWorkItem { OldIndex = String.Concat(idx.AliasName, "-v", currentVersion), NewIndex = idx.VersionedName, Alias = idx.AliasName, DeleteOld = true, ParentMaps = idx.GetIndexTypes() .Select(kvp => new ParentMap {Type = kvp.Value.Name, ParentPath = kvp.Value.ParentPath}) .Where(m => !String.IsNullOrEmpty(m.ParentPath)) .ToList() }; bool isReindexing = _lockProvider.IsLockedAsync(String.Concat("reindex:", reindexWorkItem.Alias, reindexWorkItem.OldIndex, reindexWorkItem.NewIndex)).Result; // already reindexing if (isReindexing) continue; // enqueue reindex to new version _lockProvider.TryUsingAsync("enqueue-reindex", () => _workItemQueue.EnqueueAsync(reindexWorkItem), TimeSpan.Zero, CancellationToken.None).Wait(); } }
private async Task <ReindexResult> ReindexAsync(ReindexWorkItem workItem, WorkItemContext context, int startProgress = 0, int endProgress = 100, DateTime?startTime = null) { const int pageSize = 100; const string scroll = "5m"; string timestampField = workItem.TimestampField ?? "_timestamp"; long completed = 0; var scanResults = await _client.SearchAsync <JObject>(s => s .Index(workItem.OldIndex) .AllTypes() .Filter(f => startTime.HasValue ? f.Range(r => r.OnField(timestampField).Greater(startTime.Value)) : f.MatchAll()) .From(0).Take(pageSize) .SearchType(SearchType.Scan) .Scroll(scroll)).AnyContext(); if (!scanResults.IsValid || scanResults.ScrollId == null) { Log.Error().Message("Invalid search result: message={0}", scanResults.GetErrorMessage()).Write(); return(new ReindexResult()); } long totalHits = scanResults.Total; var parentMap = workItem.ParentMaps?.ToDictionary(p => p.Type, p => p.ParentPath) ?? new Dictionary <string, string>(); var results = await _client.ScrollAsync <JObject>(scroll, scanResults.ScrollId).AnyContext(); while (results.Documents.Any()) { var bulkDescriptor = new BulkDescriptor(); foreach (var hit in results.Hits) { var h = hit; // TODO: Add support for doing JObject based schema migrations bulkDescriptor.Index <JObject>(idx => { idx .Index(workItem.NewIndex) .Type(h.Type) .Id(h.Id) .Document(h.Source); if (String.IsNullOrEmpty(h.Type)) { Log.Error().Message("Hit type empty. id={0}", h.Id).Write(); } if (parentMap.ContainsKey(h.Type)) { if (String.IsNullOrEmpty(parentMap[h.Type])) { Log.Error().Message("Parent map has empty value. id={0} type={1}", h.Id, h.Type).Write(); } var parentId = h.Source.SelectToken(parentMap[h.Type]); if (!String.IsNullOrEmpty(parentId?.ToString())) { idx.Parent(parentId.ToString()); } else { Log.Error().Message("Unable to get parent id. id={0} path={1}", h.Id, parentMap[h.Type]).Write(); } } return(idx); }); } var bulkResponse = await _client.BulkAsync(bulkDescriptor).AnyContext(); if (!bulkResponse.IsValid) { Log.Warn().Message("Reindex bulk error: old={0} new={1} completed={2} message={3}", workItem.OldIndex, workItem.NewIndex, completed, bulkResponse.GetErrorMessage()).Write(); // try each doc individually so we can see which doc is breaking us foreach (var hit in results.Hits) { var h = hit; var response = await _client.IndexAsync <JObject>(h.Source, d => { d.Index(workItem.NewIndex).Type(h.Type).Id(h.Id); if (parentMap.ContainsKey(h.Type)) { var parentId = h.Source.SelectToken(parentMap[h.Type]); if (!String.IsNullOrEmpty(parentId?.ToString())) { d.Parent(parentId.ToString()); } else { Log.Error().Message("Unable to get parent id. id={0} path={1}", h.Id, parentMap[h.Type]).Write(); } } return(d); }).AnyContext(); if (response.IsValid) { continue; } Log.Error().Message("Reindex error: old={0} new={1} id={2} completed={3} message={4}", workItem.OldIndex, workItem.NewIndex, hit.Id, completed, response.GetErrorMessage()).Write(); var errorDoc = new JObject(new { h.Type, Content = h.Source.ToString(Formatting.Indented) }); if (parentMap.ContainsKey(h.Type)) { var parentId = h.Source.SelectToken(parentMap[h.Type]); if (!String.IsNullOrEmpty(parentId?.ToString())) { errorDoc["ParentId"] = parentId.ToString(); } else { Log.Error().Message("Unable to get parent id. id={0} path={1}", h.Id, parentMap[h.Type]).Write(); } } // put the document into an error index response = await _client.IndexAsync <JObject>(errorDoc, d => { d.Index(workItem.NewIndex + "-error").Id(h.Id); return(d); }).AnyContext(); if (response.IsValid) { continue; } throw new ReindexException(response.ConnectionStatus, $"Reindex error: old={workItem.OldIndex} new={workItem.NewIndex} id={hit.Id} completed={completed} message={response.GetErrorMessage()}"); } } completed += bulkResponse.Items.Count(); int progress = CalculateProgress(totalHits, completed, startProgress, endProgress); await context.ReportProgressAsync(progress, $"Total: {totalHits} Completed: {completed}").AnyContext(); Log.Info().Message("Reindex Progress: {0} Completed: {1} Total: {2}", progress, completed, totalHits).Write(); results = await _client.ScrollAsync <JObject>(scroll, results.ScrollId).AnyContext(); } return(new ReindexResult { Total = totalHits, Completed = completed }); }