private bool GenerateIndexingGroupsByEtagRanges(IList<IndexToWorkOn> indexes, out ConcurrentSet<PrefetchingBehavior> usedPrefetchers, out List<IndexingGroup> indexingGroups) { indexingGroups = new List<IndexingGroup>(); usedPrefetchers = new ConcurrentSet<PrefetchingBehavior>(); var groupedIndexesByEtagRange = context.Configuration.IndexingClassifier.GroupMapIndexes(indexes); if (groupedIndexesByEtagRange.Count == 0) return true; groupedIndexesByEtagRange = groupedIndexesByEtagRange.OrderByDescending(x => x.Key).ToDictionary(x => x.Key, x => x.Value); foreach (var indexingGroup in groupedIndexesByEtagRange) { var result = new IndexingGroup { Indexes = indexingGroup.Value, LastIndexedEtag = indexingGroup.Key, LastQueryTime = indexingGroup.Value.Max(y => y.Index.LastQueryTime), PrefetchingBehavior = GetPrefetcherFor(indexingGroup.Key, usedPrefetchers) }; result.PrefetchingBehavior.AdditionalInfo = string.Format("Default prefetcher: {0}. For indexing group: [Indexes: {1}, LastIndexedEtag: {2}]", result.PrefetchingBehavior == defaultPrefetchingBehavior, string.Join(", ", result.Indexes.Select(y => y.Index.PublicName)), result.LastIndexedEtag); indexingGroups.Add(result); } indexingGroups = indexingGroups.OrderByDescending(x => x.LastQueryTime).ToList(); return false; }
private void SetPrefetcherForIndexingGroup(IndexingGroup groupIndex, ConcurrentSet <PrefetchingBehavior> usedPrefetchers) { groupIndex.PrefetchingBehavior = TryGetPrefetcherFor(groupIndex.LastIndexedEtag, usedPrefetchers) ?? TryGetDefaultPrefetcher(groupIndex.LastIndexedEtag, usedPrefetchers) ?? GetPrefetcherFor(groupIndex.LastIndexedEtag, usedPrefetchers); groupIndex.PrefetchingBehavior.Indexes = groupIndex.Indexes; groupIndex.PrefetchingBehavior.LastIndexedEtag = groupIndex.LastIndexedEtag; }
private void ReleasePrefethersAndUpdateStatistics(IndexingGroup indexingGroup, TimeSpan ellapsedTimeSpan) { if (indexingGroup.JsonDocs != null && indexingGroup.JsonDocs.Count > 0) { indexingGroup.PrefetchingBehavior.CleanupDocuments(indexingGroup.LastIndexedEtag); indexingGroup.PrefetchingBehavior.UpdateAutoThrottler(indexingGroup.JsonDocs, ellapsedTimeSpan); indexingGroup.PrefetchingBehavior.BatchProcessingComplete(); context.ReportIndexingBatchCompleted(indexingGroup.BatchInfo); } indexingGroup.ReleaseIndexingGroupFinished(); }
protected override void ExecuteIndexingWork(IList <IndexToWorkOn> indexes) { var indexingGroups = context.Configuration.IndexingClassifier.GroupMapIndexes(indexes); indexingGroups = indexingGroups.OrderByDescending(x => x.Key).ToDictionary(x => x.Key, x => x.Value); if (indexingGroups.Count == 0) { return; } var usedPrefetchers = new ConcurrentSet <PrefetchingBehavior>(); var groupedIndexes = indexingGroups.Select(x => { var result = new IndexingGroup { LastIndexedEtag = x.Key, Indexes = x.Value, LastQueryTime = x.Value.Max(y => y.Index.LastQueryTime), PrefetchingBehavior = GetPrefetcherFor(x.Key, usedPrefetchers) }; result.PrefetchingBehavior.AdditionalInfo = string.Format("Default prefetcher: {0}. For indexing group: [Indexes: {1}, LastIndexedEtag: {2}]", result.PrefetchingBehavior == defaultPrefetchingBehavior, string.Join(", ", result.Indexes.Select(y => y.Index.PublicName)), result.LastIndexedEtag); return(result); }).OrderByDescending(x => x.LastQueryTime).ToList(); var maxIndexOutputsPerDoc = groupedIndexes.Max(x => x.Indexes.Max(y => y.Index.MaxIndexOutputsPerDocument)); var containsMapReduceIndexes = groupedIndexes.Any(x => x.Indexes.Any(y => y.Index.IsMapReduce)); var recoverTunerState = ((IndexBatchSizeAutoTuner)autoTuner).ConsiderLimitingNumberOfItemsToProcessForThisBatch(maxIndexOutputsPerDoc, containsMapReduceIndexes); BackgroundTaskExecuter.Instance.ExecuteAll(context, groupedIndexes, (indexingGroup, i) => { context.CancellationToken.ThrowIfCancellationRequested(); using (LogContext.WithDatabase(context.DatabaseName)) { var prefetchingBehavior = indexingGroup.PrefetchingBehavior; var indexesToWorkOn = indexingGroup.Indexes; var operationCancelled = false; TimeSpan indexingDuration = TimeSpan.Zero; var lastEtag = Etag.Empty; List <JsonDocument> jsonDocs; IndexingBatchInfo batchInfo = null; using (MapIndexingInProgress(indexesToWorkOn)) using (prefetchingBehavior.DocumentBatchFrom(indexingGroup.LastIndexedEtag, out jsonDocs)) { try { if (Log.IsDebugEnabled) { Log.Debug("Found a total of {0} documents that requires indexing since etag: {1}: ({2})", jsonDocs.Count, indexingGroup.LastIndexedEtag, string.Join(", ", jsonDocs.Select(x => x.Key))); } batchInfo = context.ReportIndexingBatchStarted(jsonDocs.Count, jsonDocs.Sum(x => x.SerializedSizeOnDisk), indexesToWorkOn.Select(x => x.Index.PublicName).ToList()); context.CancellationToken.ThrowIfCancellationRequested(); if (jsonDocs.Count <= 0) { return; } var sw = Stopwatch.StartNew(); lastEtag = DoActualIndexing(indexesToWorkOn, jsonDocs, batchInfo); indexingDuration = sw.Elapsed; } catch (InvalidDataException e) { Log.ErrorException("Failed to index because of data corruption. ", e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, string.Format("Failed to index because of data corruption. Reason: {0}", e.Message))); } catch (OperationCanceledException) { operationCancelled = true; } finally { if (operationCancelled == false && jsonDocs != null && jsonDocs.Count > 0) { prefetchingBehavior.CleanupDocuments(lastEtag); prefetchingBehavior.UpdateAutoThrottler(jsonDocs, indexingDuration); } prefetchingBehavior.BatchProcessingComplete(); if (batchInfo != null) { context.ReportIndexingBatchCompleted(batchInfo); } } } } }); if (recoverTunerState != null) { recoverTunerState(); } RemoveUnusedPrefetchers(usedPrefetchers); }
private void SetPrefetcherForIndexingGroup(IndexingGroup groupIndex, ConcurrentSet<PrefetchingBehavior> usedPrefetchers) { groupIndex.PrefetchingBehavior = TryGetPrefetcherFor(groupIndex.LastIndexedEtag, usedPrefetchers) ?? TryGetDefaultPrefetcher(groupIndex.LastIndexedEtag, usedPrefetchers) ?? GetPrefetcherFor(groupIndex.LastIndexedEtag, usedPrefetchers); groupIndex.PrefetchingBehavior.Indexes = groupIndex.Indexes; groupIndex.PrefetchingBehavior.LastIndexedEtag = groupIndex.LastIndexedEtag; }
protected override void ExecuteIndexingWork(IList<IndexToWorkOn> indexes) { var indexingGroups = context.Configuration.IndexingClassifier.GroupMapIndexes(indexes); indexingGroups = indexingGroups.OrderByDescending(x => x.Key).ToDictionary(x => x.Key, x => x.Value); if (indexingGroups.Count == 0) return; var usedPrefetchers = new ConcurrentSet<PrefetchingBehavior>(); var groupedIndexes = indexingGroups.Select(x => { var result = new IndexingGroup { LastIndexedEtag = x.Key, Indexes = x.Value, LastQueryTime = x.Value.Max(y => y.Index.LastQueryTime), }; SetPrefetcherForIndexingGroup(result, usedPrefetchers); return result; }).OrderByDescending(x => x.LastQueryTime).ToList(); var maxIndexOutputsPerDoc = groupedIndexes.Max(x => x.Indexes.Max(y => y.Index.MaxIndexOutputsPerDocument)); var containsMapReduceIndexes = groupedIndexes.Any(x => x.Indexes.Any(y => y.Index.IsMapReduce)); var recoverTunerState = ((IndexBatchSizeAutoTuner)autoTuner).ConsiderLimitingNumberOfItemsToProcessForThisBatch(maxIndexOutputsPerDoc, containsMapReduceIndexes); BackgroundTaskExecuter.Instance.ExecuteAll(context, groupedIndexes, (indexingGroup, i) => { context.CancellationToken.ThrowIfCancellationRequested(); using (LogContext.WithDatabase(context.DatabaseName)) { var prefetchingBehavior = indexingGroup.PrefetchingBehavior; var indexesToWorkOn = indexingGroup.Indexes; var operationCanceled = false; TimeSpan indexingDuration = TimeSpan.Zero; var lastEtag = Etag.Empty; List<JsonDocument> jsonDocs; IndexingBatchInfo batchInfo = null; using (MapIndexingInProgress(indexesToWorkOn)) using (prefetchingBehavior.DocumentBatchFrom(indexingGroup.LastIndexedEtag, out jsonDocs)) { try { if (Log.IsDebugEnabled) { Log.Debug("Found a total of {0} documents that requires indexing since etag: {1}: ({2})", jsonDocs.Count, indexingGroup.LastIndexedEtag, string.Join(", ", jsonDocs.Select(x => x.Key))); } batchInfo = context.ReportIndexingBatchStarted(jsonDocs.Count, jsonDocs.Sum(x => x.SerializedSizeOnDisk), indexesToWorkOn.Select(x => x.Index.PublicName).ToList()); context.CancellationToken.ThrowIfCancellationRequested(); if (jsonDocs.Count <= 0) { return; } var sw = Stopwatch.StartNew(); lastEtag = DoActualIndexing(indexesToWorkOn, jsonDocs, batchInfo); indexingDuration = sw.Elapsed; } catch (InvalidDataException e) { Log.ErrorException("Failed to index because of data corruption. ", e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, string.Format("Failed to index because of data corruption. Reason: {0}", e.Message))); } catch (OperationCanceledException) { operationCanceled = true; } catch (AggregateException e) { var anyOperationsCanceled = e .InnerExceptions .OfType<OperationCanceledException>() .Any(); if (anyOperationsCanceled == false) throw; operationCanceled = true; } finally { if (operationCanceled == false && jsonDocs != null && jsonDocs.Count > 0) { prefetchingBehavior.CleanupDocuments(lastEtag); prefetchingBehavior.UpdateAutoThrottler(jsonDocs, indexingDuration); } prefetchingBehavior.BatchProcessingComplete(); if (batchInfo != null) context.ReportIndexingBatchCompleted(batchInfo); } } } }); if (recoverTunerState != null) recoverTunerState(); RemoveUnusedPrefetchers(usedPrefetchers); }
protected override void ExecuteIndexingWork(IList<IndexToWorkOn> indexes) { var indexingGroups = context.Configuration.IndexingClassifier.GroupMapIndexes(indexes); indexingGroups = indexingGroups.OrderByDescending(x => x.Key).ToDictionary(x => x.Key, x => x.Value); if (indexingGroups.Count == 0) return; var usedPrefetchers = new ConcurrentSet<PrefetchingBehavior>(); var groupedIndexes = indexingGroups.Select(x => { var result = new IndexingGroup { LastIndexedEtag = x.Key, Indexes = x.Value, LastQueryTime = x.Value.Max(y => y.Index.LastQueryTime), }; SetPrefetcherForIndexingGroup(result, usedPrefetchers); return result; }).OrderByDescending(x => x.LastQueryTime).ToList(); var maxIndexOutputsPerDoc = groupedIndexes.Max(x => x.Indexes.Max(y => y.Index.MaxIndexOutputsPerDocument)); var containsMapReduceIndexes = groupedIndexes.Any(x => x.Indexes.Any(y => y.Index.IsMapReduce)); var recoverTunerState = ((IndexBatchSizeAutoTuner)autoTuner).ConsiderLimitingNumberOfItemsToProcessForThisBatch(maxIndexOutputsPerDoc, containsMapReduceIndexes); BackgroundTaskExecuter.Instance.ExecuteAll(context, groupedIndexes, (indexingGroup, i) => { context.CancellationToken.ThrowIfCancellationRequested(); using (LogContext.WithDatabase(context.DatabaseName)) { var prefetchingBehavior = indexingGroup.PrefetchingBehavior; var indexesToWorkOn = indexingGroup.Indexes; var operationCanceled = false; TimeSpan indexingDuration = TimeSpan.Zero; var lastEtag = Etag.Empty; IndexingBatchInfo batchInfo = null; try { using (MapIndexingInProgress(indexesToWorkOn)) { List<JsonDocument> jsonDocs; using (prefetchingBehavior.DocumentBatchFrom(indexingGroup.LastIndexedEtag, out jsonDocs)) { try { if (Log.IsDebugEnabled) { Log.Debug("Found a total of {0} documents that requires indexing since etag: {1}: ({2})", jsonDocs.Count, indexingGroup.LastIndexedEtag, string.Join(", ", jsonDocs.Select(x => x.Key))); } batchInfo = context.ReportIndexingBatchStarted(jsonDocs.Count, jsonDocs.Sum(x => x.SerializedSizeOnDisk), indexesToWorkOn.Select(x => x.Index.PublicName).ToList()); context.CancellationToken.ThrowIfCancellationRequested(); if (jsonDocs.Count <= 0) { return; } var sw = Stopwatch.StartNew(); lastEtag = DoActualIndexing(indexesToWorkOn, jsonDocs, batchInfo); indexingDuration = sw.Elapsed; } catch (InvalidDataException e) { Log.ErrorException("Failed to index because of data corruption. ", e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, string.Format("Failed to index because of data corruption. Reason: {0}", e.Message))); } catch (OperationCanceledException) { operationCanceled = true; } catch (AggregateException e) { var allOperationsCanceled = e .InnerExceptions .All(x => x is OperationCanceledException); if (allOperationsCanceled == false) throw; operationCanceled = true; } catch (Exception) { //we should not clean docs from prefetching behavior because something //unexpected has thrown //logging will be done in catch in the outer scope operationCanceled = true; // ReSharper disable once ThrowingSystemException throw; } finally { if (operationCanceled == false && jsonDocs != null && jsonDocs.Count > 0) { prefetchingBehavior.CleanupDocuments(lastEtag); prefetchingBehavior.UpdateAutoThrottler(jsonDocs, indexingDuration); } prefetchingBehavior.BatchProcessingComplete(); if (batchInfo != null) context.ReportIndexingBatchCompleted(batchInfo); } } } } catch (IndexDoesNotExistsException) { Log.Info("Indexing batch was aborted because index was deleted, indexing batch will be retried"); throw; } catch (OperationCanceledException) { //expected exception here, nothing to do } catch (AggregateException e) { var allOperationsCanceled = e .InnerExceptions .All(x => x is OperationCanceledException); if (allOperationsCanceled == false) { var message = string.Format("Unexpected AggregateException happened during execution of indexing batch...this is not supposed to happen. Reason: {0}", e); Log.Error(message, e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, message)); } } catch (Exception e) { //this is a precaution, no exception should happen at this point var message = string.Format("Unexpected exception happened during execution of indexing batch...this is not supposed to happen. Reason: {0}", e); Log.Error(message, e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, message)); //rethrow because we do not want to interrupt the existing exception flow // ReSharper disable once ThrowingSystemException throw; } } }); if (recoverTunerState != null) recoverTunerState(); RemoveUnusedPrefetchers(usedPrefetchers); }
protected override void ExecuteIndexingWork(IList <IndexToWorkOn> indexes) { var indexingGroups = context.Configuration.IndexingClassifier.GroupMapIndexes(indexes); indexingGroups = indexingGroups.OrderByDescending(x => x.Key).ToDictionary(x => x.Key, x => x.Value); if (indexingGroups.Count == 0) { return; } var usedPrefetchers = new ConcurrentSet <PrefetchingBehavior>(); var groupedIndexes = indexingGroups.Select(x => { var result = new IndexingGroup { LastIndexedEtag = x.Key, Indexes = x.Value, LastQueryTime = x.Value.Max(y => y.Index.LastQueryTime), }; SetPrefetcherForIndexingGroup(result, usedPrefetchers); return(result); }).OrderByDescending(x => x.LastQueryTime).ToList(); var maxIndexOutputsPerDoc = groupedIndexes.Max(x => x.Indexes.Max(y => y.Index.MaxIndexOutputsPerDocument)); var containsMapReduceIndexes = groupedIndexes.Any(x => x.Indexes.Any(y => y.Index.IsMapReduce)); var recoverTunerState = ((IndexBatchSizeAutoTuner)autoTuner).ConsiderLimitingNumberOfItemsToProcessForThisBatch(maxIndexOutputsPerDoc, containsMapReduceIndexes); BackgroundTaskExecuter.Instance.ExecuteAll(context, groupedIndexes, (indexingGroup, i) => { context.CancellationToken.ThrowIfCancellationRequested(); using (LogContext.WithDatabase(context.DatabaseName)) { var prefetchingBehavior = indexingGroup.PrefetchingBehavior; var indexesToWorkOn = indexingGroup.Indexes; var operationCanceled = false; TimeSpan indexingDuration = TimeSpan.Zero; var lastEtag = Etag.Empty; IndexingBatchInfo batchInfo = null; try { using (MapIndexingInProgress(indexesToWorkOn)) { List <JsonDocument> jsonDocs; using (prefetchingBehavior.DocumentBatchFrom(indexingGroup.LastIndexedEtag, out jsonDocs)) { try { if (Log.IsDebugEnabled) { Log.Debug("Found a total of {0} documents that requires indexing since etag: {1}: ({2})", jsonDocs.Count, indexingGroup.LastIndexedEtag, string.Join(", ", jsonDocs.Select(x => x.Key))); } batchInfo = context.ReportIndexingBatchStarted(jsonDocs.Count, jsonDocs.Sum(x => x.SerializedSizeOnDisk), indexesToWorkOn.Select(x => x.Index.PublicName).ToList()); context.CancellationToken.ThrowIfCancellationRequested(); if (jsonDocs.Count <= 0) { return; } var sw = Stopwatch.StartNew(); lastEtag = DoActualIndexing(indexesToWorkOn, jsonDocs, batchInfo); indexingDuration = sw.Elapsed; } catch (InvalidDataException e) { Log.ErrorException("Failed to index because of data corruption. ", e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, string.Format("Failed to index because of data corruption. Reason: {0}", e.Message))); } catch (OperationCanceledException) { operationCanceled = true; } catch (AggregateException e) { var allOperationsCanceled = e .InnerExceptions .All(x => x is OperationCanceledException); if (allOperationsCanceled == false) { throw; } operationCanceled = true; } catch (Exception) { //we should not clean docs from prefetching behavior because something //unexpected has thrown //logging will be done in catch in the outer scope operationCanceled = true; // ReSharper disable once ThrowingSystemException throw; } finally { if (operationCanceled == false && jsonDocs != null && jsonDocs.Count > 0) { prefetchingBehavior.CleanupDocuments(lastEtag); prefetchingBehavior.UpdateAutoThrottler(jsonDocs, indexingDuration); } prefetchingBehavior.BatchProcessingComplete(); if (batchInfo != null) { context.ReportIndexingBatchCompleted(batchInfo); } } } } } catch (OperationCanceledException) { //expected exception here, nothing to do } catch (AggregateException e) { var allOperationsCanceled = e .InnerExceptions .All(x => x is OperationCanceledException); if (allOperationsCanceled == false) { var message = string.Format("Unexpected AggregateException happened during execution of indexing batch...this is not supposed to happen. Reason: {0}", e); Log.Error(message, e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, message)); } } catch (Exception e) { //this is a precaution, no exception should happen at this point var message = string.Format("Unexpected exception happened during execution of indexing batch...this is not supposed to happen. Reason: {0}", e); Log.Error(message, e); indexesToWorkOn.ForEach(index => context.AddError(index.IndexId, index.Index.PublicName, null, message)); //rethrow because we do not want to interrupt the existing exception flow // ReSharper disable once ThrowingSystemException throw; } } }); if (recoverTunerState != null) { recoverTunerState(); } RemoveUnusedPrefetchers(usedPrefetchers); }