public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName); if (viewGenerator == null) { return; } bool operationCanceled = false; var itemsToDelete = new List <object>(); IList <ReduceTypePerKey> mappedResultsInfo = null; transactionalStorage.Batch(actions => { mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexName, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep).ToList(); }); var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray(); var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray(); try { if (singleStepReduceKeys.Length > 0) { SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete); } if (multiStepsReduceKeys.Length > 0) { MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete); } } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp); }); } } }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId); if (viewGenerator == null) return; bool operationCanceled = false; var itemsToDelete = new ConcurrentSet<object>(); IList<ReduceTypePerKey> mappedResultsInfo = null; transactionalStorage.Batch(actions => { mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep).ToList(); }); var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray(); var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray(); currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index); try { if (singleStepReduceKeys.Length > 0) { Log.Debug("SingleStep reduce for keys: {0}",singleStepReduceKeys.Select(x => x + ",")); SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete); } if (multiStepsReduceKeys.Length > 0) { Log.Debug("MultiStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ",")); MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete); } } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if (latest == null) return; actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp); }); } Index _; currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _); } }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { List <MappedResultInfo> reduceKeyAndEtags = null; try { transactionalStorage.Batch(actions => { reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter ( indexToWorkOn.IndexName, indexToWorkOn.LastIndexedEtag, loadData: false, // for reduce operations, we use the smaller value, rather than tuning stuff on the fly // the reason for that is that we may have large number of map values to reduce anyway, // so we don't want to try to load too much all at once. take: context.Configuration.InitialNumberOfItemsToIndexInSingleBatch ) .ToList(); if (log.IsDebugEnabled) { if (reduceKeyAndEtags.Count > 0) { log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName)); } else { log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } } new ReduceTask { Index = indexToWorkOn.IndexName, ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(), }.Execute(context); }); } finally { if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0) { var lastByEtag = GetLastByEtag(reduceKeyAndEtags); var lastEtag = lastByEtag.Etag; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0) { actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp); } }); } } }
private void MarkIndexes(IndexToWorkOn indexToWorkOn, ComparableByteArray lastIndexedEtag, IStorageActionsAccessor actions, Guid lastEtag, DateTime lastModified) { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) > 0) { return; } actions.Indexing.UpdateLastIndexed(indexToWorkOn.IndexName, lastEtag, lastModified); }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { List <MappedResultInfo> reduceKeyAndEtags = null; try { transactionalStorage.Batch(actions => { reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter ( indexToWorkOn.IndexName, indexToWorkOn.LastIndexedEtag, loadData: false, take: autoTuner.NumberOfItemsToIndexInSingleBatch ) .ToList(); if (log.IsDebugEnabled) { if (reduceKeyAndEtags.Count > 0) { log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName)); } else { log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } } new ReduceTask { Index = indexToWorkOn.IndexName, ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(), }.Execute(context); }); } finally { if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0) { var lastByEtag = GetLastByEtag(reduceKeyAndEtags); var lastEtag = lastByEtag.Etag; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0) { actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp); } }); autoTuner.AutoThrottleBatchSize(reduceKeyAndEtags.Count, reduceKeyAndEtags.Sum(x => x.Size)); } } }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { TimeSpan reduceDuration= TimeSpan.Zero; List<MappedResultInfo> reduceKeyAndEtags = null; try { transactionalStorage.Batch(actions => { reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter ( indexToWorkOn.IndexName, indexToWorkOn.LastIndexedEtag, loadData: false, take: autoTuner.NumberOfItemsToIndexInSingleBatch ) .ToList(); if(log.IsDebugEnabled) { if (reduceKeyAndEtags.Count > 0) log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName)); else log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } var sw = Stopwatch.StartNew(); new ReduceTask { Index = indexToWorkOn.IndexName, ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(), }.Execute(context); reduceDuration = sw.Elapsed; }); } finally { if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0) { var lastByEtag = GetLastByEtag(reduceKeyAndEtags); var lastEtag = lastByEtag.Etag; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0) { actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp); } }); autoTuner.AutoThrottleBatchSize(reduceKeyAndEtags.Count, reduceKeyAndEtags.Sum(x => x.Size), reduceDuration); } } }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { List<MappedResultInfo> reduceKeyAndEtags = null; try { transactionalStorage.Batch(actions => { reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter ( indexToWorkOn.IndexName, indexToWorkOn.LastIndexedEtag, loadData: false, // for reduce operations, we use the smaller value, rather than tuning stuff on the fly // the reason for that is that we may have large number of map values to reduce anyway, // so we don't want to try to load too much all at once. take: context.Configuration.InitialNumberOfItemsToIndexInSingleBatch ) .ToList(); if(log.IsDebugEnabled) { if (reduceKeyAndEtags.Count > 0) log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName)); else log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } new ReduceTask { Index = indexToWorkOn.IndexName, ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(), }.Execute(context); }); } finally { if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0) { var lastByEtag = GetLastByEtag(reduceKeyAndEtags); var lastEtag = lastByEtag.Etag; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0) { actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp); } }); } } }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { List<MappedResultInfo> reduceKeyAndEtags = null; try { transactionalStorage.Batch(actions => { reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter ( indexToWorkOn.IndexName, indexToWorkOn.LastIndexedEtag ).ToList(); if(log.IsDebugEnabled) { if (reduceKeyAndEtags.Count > 0) log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName)); else log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } new ReduceTask { Index = indexToWorkOn.IndexName, ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(), }.Execute(context); }); } finally { if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0) { var lastByEtag = GetLastByEtag(reduceKeyAndEtags); var lastEtag = lastByEtag.Etag; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0) { actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp); } }); } } }
protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn, CancellationToken token) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId); if (viewGenerator == null) { return(null); } bool operationCanceled = false; var itemsToDelete = new ConcurrentSet <object>(); var singleStepReduceKeys = new List <string>(); var multiStepsReduceKeys = new List <string>(); transactionalStorage.Batch(actions => { var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep, token); foreach (var key in mappedResultsInfo) { token.ThrowIfCancellationRequested(); switch (key.OperationTypeToPerform) { case ReduceType.SingleStep: singleStepReduceKeys.Add(key.ReduceKey); break; case ReduceType.MultiStep: multiStepsReduceKeys.Add(key.ReduceKey); break; } } }); currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index); var performanceStats = new List <ReducingPerformanceStats>(); try { if (singleStepReduceKeys.Count > 0) { if (Log.IsDebugEnabled) { Log.Debug("SingleStep reduce for keys: {0}", string.Join(",", singleStepReduceKeys)); } var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, token); performanceStats.Add(singleStepStats); } if (multiStepsReduceKeys.Count > 0) { if (Log.IsDebugEnabled) { Log.Debug("MultiStep reduce for keys: {0}", string.Join(",", multiStepsReduceKeys)); } var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, token); performanceStats.Add(multiStepStats); } } catch (OperationCanceledException) { operationCanceled = true; } catch (AggregateException e) { var anyOperationsCanceled = e .InnerExceptions .OfType <OperationCanceledException>() .Any(); if (anyOperationsCanceled == false) { throw; } operationCanceled = true; } finally { var postReducingOperations = new ReduceLevelPeformanceStats { Level = -1, Started = SystemTime.UtcNow }; if (operationCanceled == false) { var deletingScheduledReductionsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; ScheduledReductionInfo latest; using (StopwatchScope.For(deletingScheduledReductionsDuration)) { latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); } if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexId, latest.Etag, latest.Timestamp); }); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); } postReducingOperations.Completed = SystemTime.UtcNow; postReducingOperations.Duration = postReducingOperations.Completed - postReducingOperations.Started; performanceStats.Add(new ReducingPerformanceStats(ReduceType.None) { LevelStats = new List <ReduceLevelPeformanceStats> { postReducingOperations } }); Index _; currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _); } return(performanceStats.ToArray()); }
private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token) { var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexId, 0, result); } } }); var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep); for (int i = 0; i < 3; i++) { var level = i; var reduceLevelStats = new ReduceLevelPeformanceStats() { Level = level, Started = SystemTime.UtcNow, }; var reduceParams = new GetItemsToReduceParams( index.IndexId, new HashSet <string>(keysToReduce), level, true, itemsToDelete); var gettingItemsToReduceDuration = new Stopwatch(); var scheduleReductionsDuration = new Stopwatch(); var removeReduceResultsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { token.ThrowIfCancellationRequested(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; int size = 0; IList <MappedResultInfo> persistedResults; var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); using (StopwatchScope.For(gettingItemsToReduceDuration)) { persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token); foreach (var item in persistedResults) { reduceKeys.Add(item.ReduceKey); size += item.Size; } } if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.Index.PublicName, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.Index.PublicName); } } token.ThrowIfCancellationRequested(); var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(removeReduceResultsDuration)) { foreach (var mappedResultInfo in requiredReduceNextTimeSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } } if (level != 2) { var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(scheduleReductionsDuration)) { foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket); } } } token.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); var results = persistedResults.Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToList(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); context.MetricsCounters.ReducedPerSecond.Mark(results.Count()); reduceLevelStats.Add(performance); var batchDuration = batchTimeWatcher.Elapsed; if (Log.IsDebugEnabled) { Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level); } autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _); } } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); reducePerformance.LevelStats.Add(reduceLevelStats); } foreach (var reduceKey in needToMoveToMultiStep) { token.ThrowIfCancellationRequested(); string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.MultiStep)); } return(reducePerformance); }
private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete) { var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexName, 0, result); } } }); for (int i = 0; i < 3; i++) { var level = i; var reduceParams = new GetItemsToReduceParams( index.IndexName, keysToReduce, level, true, itemsToDelete); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList(); if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; var size = persistedResults.Sum(x => x.Size); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); foreach (var reduceKeysAndBucket in reduceKeysAndBuckets) { actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket); } } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); var batchDuration = batchTimeWatcher.Elapsed; Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level); autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _); } } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (MapIndexingInProgress(new List<IndexToWorkOn> {indexToWorkOn})) { var indexingBatchForIndex = FilterIndexes(new List<IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) return; IndexingBatchInfo batchInfo = null; try { context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, precomputedBatch.Documents.Sum(x => x.SerializedSizeOnDisk), new List<string>() { indexToWorkOn.Index.PublicName }, out batchInfo); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); } finally { var performance = indexingBatchForIndex.Batch.GetIndexingPerformance(); if (batchInfo != null) { if (performance != null) batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); batchInfo.BatchCompleted(); } } } }
private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete) { var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexName, 0, result); } } }); for (int i = 0; i < 3; i++) { var level = i; var reduceParams = new GetItemsToReduceParams( index.IndexName, keysToReduce, level, true, itemsToDelete); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList(); if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; var size = persistedResults.Sum(x => x.Size); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, batchTimeWatcher.Elapsed)); else Log.Debug("No reduce keys found for {0}", index.IndexName); } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); foreach (var reduceKeysAndBucket in reduceKeysAndBuckets) { actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket); } } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); var batchDuration = batchTimeWatcher.Elapsed; Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level); autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _); } } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } }
private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); actions.MapReduce.ScheduleReductions(index.IndexName, 0, mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))); } }); for (int i = 0; i < 3; i++) { var level = i; transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var persistedResults = actions.MapReduce.GetItemsToReduce ( level: level, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: true ).ToList(); var sp = Stopwatch.StartNew(); result.count += persistedResults.Count; result.size += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, sp.Elapsed)); else Log.Debug("No reduce keys found for {0}", index.IndexName); } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket/1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, index.IndexName, sp.Elapsed); }); } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } return result; }
private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token) { var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexId, 0, result); } } }); var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep); var keysToReduceSet = new HashSet<string>(keysToReduce); for (int i = 0; i < 3; i++) { var level = i; var reduceLevelStats = new ReduceLevelPeformanceStats() { Level = level, Started = SystemTime.UtcNow, }; var reduceParams = new GetItemsToReduceParams( index.IndexId, keysToReduceSet, level, true, itemsToDelete); var gettingItemsToReduceDuration = new Stopwatch(); var scheduleReductionsDuration = new Stopwatch(); var removeReduceResultsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { token.ThrowIfCancellationRequested(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; int size = 0; IList<MappedResultInfo> persistedResults; var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase); using (StopwatchScope.For(gettingItemsToReduceDuration)) { persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token); foreach (var item in persistedResults) { reduceKeys.Add(item.ReduceKey); size += item.Size; } } if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexId, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexId); } } token.ThrowIfCancellationRequested(); var requiredReduceNextTimeSet = new HashSet<ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(removeReduceResultsDuration)) { foreach (var mappedResultInfo in requiredReduceNextTimeSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } } if (level != 2) { var reduceKeysAndBucketsSet = new HashSet<ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(scheduleReductionsDuration)) { foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket); } } } token.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); var results = persistedResults.Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); context.MetricsCounters.ReducedPerSecond.Mark(results.Count()); reduceLevelStats.Add(performance); var batchDuration = batchTimeWatcher.Elapsed; if ( Log.IsDebugEnabled ) { Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.IndexId, reduceTimeWatcher.Elapsed, level); } autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _); } } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); reducePerformance.LevelStats.Add(reduceLevelStats); } foreach (var reduceKey in needToMoveToMultiStep) { token.ThrowIfCancellationRequested(); string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.MultiStep)); } return reducePerformance; }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName); if (viewGenerator == null) { return; } TimeSpan reduceDuration = TimeSpan.Zero; int totalCount = 0; int totalSize = 0; bool operationCanceled = false; var itemsToDelete = new List <object>(); IList <ReduceTypePerKey> mappedResultsInfo = null; transactionalStorage.Batch(actions => { mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexName, context.NumberOfItemsToExecuteReduceInSingleStep).ToList(); }); var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray(); var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray(); var sw = Stopwatch.StartNew(); try { if (singleStepReduceKeys.Length > 0) { var reduceCounters = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete); totalCount += reduceCounters.count; totalSize += reduceCounters.size; } if (multiStepsReduceKeys.Length > 0) { var reduceCounters = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete); totalCount += reduceCounters.count; totalSize += reduceCounters.size; } reduceDuration = sw.Elapsed; } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp); }); autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration); } } }
protected ReducingPerformanceStats[] HandleReduceForIndex( IndexToWorkOn indexToWorkOn, bool skipIncreasingBatchSize, CancellationToken token) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId); if (viewGenerator == null) { return(null); } bool operationCanceled = false; var itemsToDelete = new ConcurrentSet <object>(); var singleStepReduceKeys = new List <string>(); var multiStepsReduceKeys = new List <string>(); transactionalStorage.Batch(actions => { var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep, token); foreach (var key in mappedResultsInfo) { token.ThrowIfCancellationRequested(); switch (key.OperationTypeToPerform) { case ReduceType.SingleStep: singleStepReduceKeys.Add(key.ReduceKey); break; case ReduceType.MultiStep: multiStepsReduceKeys.Add(key.ReduceKey); break; } } }); var performanceStats = new List <ReducingPerformanceStats>(); try { if (singleStepReduceKeys.Count > 0) { if (Log.IsDebugEnabled) { Log.Debug("SingleStep reduce for keys: {0}", string.Join(",", singleStepReduceKeys)); } var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, skipIncreasingBatchSize, token); performanceStats.Add(singleStepStats); } if (multiStepsReduceKeys.Count > 0) { if (Log.IsDebugEnabled) { Log.Debug("MultiStep reduce for keys: {0}", string.Join(",", multiStepsReduceKeys)); } var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, skipIncreasingBatchSize, token); performanceStats.Add(multiStepStats); } } catch (IndexDoesNotExistsException) { // race condition -> index was deleted // we can ignore this operationCanceled = true; } catch (ObjectDisposedException) { // index was disposed // we can ignore this operationCanceled = true; } catch (Exception e) { if (HandleIfOutOfMemory(e, new OutOfMemoryDetails { Index = indexToWorkOn.Index, FailedItemsToProcessCount = singleStepReduceKeys.Count + multiStepsReduceKeys.Count, IsReducing = true })) { //if we got a OOME we need to decrease the batch size operationCanceled = true; return(null); } if (IsOperationCanceledException(e)) { operationCanceled = true; return(null); } var message = $"Failed to reduce index: {indexToWorkOn.Index.PublicName} (id: {indexToWorkOn.IndexId}) " + $"{singleStepReduceKeys.Count} single step keys and {multiStepsReduceKeys.Count} multi step keys. " + "Skipping this batch (it won't be reduced)"; indexToWorkOn.Index.AddIndexingError(e, message); } finally { var postReducingOperations = new ReduceLevelPeformanceStats { Level = -1, Started = SystemTime.UtcNow }; if (operationCanceled == false) { // need to flush the changes made to the map-reduce index // before commiting the deletions of the scheduled reductions context.IndexStorage.FlushIndex(indexToWorkOn.IndexId, onlyAddIndexError: true); var deletingScheduledReductionsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; ScheduledReductionInfo latest; using (StopwatchScope.For(deletingScheduledReductionsDuration)) { latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete, token); } if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexId, latest.Etag, latest.Timestamp); }); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); } postReducingOperations.Completed = SystemTime.UtcNow; postReducingOperations.Duration = postReducingOperations.Completed - postReducingOperations.Started; performanceStats.Add(new ReducingPerformanceStats(ReduceType.None) { LevelStats = new List <ReduceLevelPeformanceStats> { postReducingOperations } }); } return(performanceStats.ToArray()); }
private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToSingleStep = new HashSet<string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); transactionalStorage.Batch(actions => { var scheduledItems = actions.MapReduce.GetItemsToReduce ( level: 0, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: false ).ToList(); // Only look at the scheduled batch for this run, not the entire set of pending reductions. //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray(); foreach (var reduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) needToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, keysToReduce, loadData: true ).ToList(); result.count += mappedResults.Count; result.size += mappedResults.Sum(x => x.Size); var reduceKeys = new HashSet<string>(keysToReduce); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); var results = mappedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys); }); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } return result; }
protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId); if (viewGenerator == null) { return(null); } bool operationCanceled = false; var itemsToDelete = new ConcurrentSet <object>(); IList <ReduceTypePerKey> mappedResultsInfo = null; transactionalStorage.Batch(actions => { mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep).ToList(); }); var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray(); var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray(); currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index); var performanceStats = new List <ReducingPerformanceStats>(); try { if (singleStepReduceKeys.Length > 0) { Log.Debug("SingleStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ",")); var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete); performanceStats.Add(singleStepStats); } if (multiStepsReduceKeys.Length > 0) { Log.Debug("MultiStep reduce for keys: {0}", multiStepsReduceKeys.Select(x => x + ",")); var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete); performanceStats.Add(multiStepStats); } } catch (OperationCanceledException) { operationCanceled = true; } finally { var postReducingOperations = new ReduceLevelPeformanceStats { Level = -1, Started = SystemTime.UtcNow }; if (operationCanceled == false) { var deletingScheduledReductionsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; ScheduledReductionInfo latest; using (StopwatchScope.For(deletingScheduledReductionsDuration)) { latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); } if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp); }); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); } postReducingOperations.Completed = SystemTime.UtcNow; postReducingOperations.Duration = postReducingOperations.Completed - postReducingOperations.Started; performanceStats.Add(new ReducingPerformanceStats(ReduceType.None) { LevelStats = new List <ReduceLevelPeformanceStats> { postReducingOperations } }); Index _; currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _); } return(performanceStats.ToArray()); }
private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token) { var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >(); if (Log.IsDebugEnabled) { Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce))); } var batchTimeWatcher = Stopwatch.StartNew(); var reducingBatchThrottlerId = Guid.NewGuid(); var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep); var reduceLevelStats = new ReduceLevelPeformanceStats { Started = SystemTime.UtcNow, Level = 2 }; try { var parallelOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localNeedToMoveToSingleStep = new HashSet <string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet <string>(); while (enumerator.MoveNext()) { token.ThrowIfCancellationRequested(); localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var getItemsToReduceDuration = Stopwatch.StartNew(); int scheduledItemsSum = 0; int scheduledItemsCount = 0; List <int> scheduledItemsMappedBuckets = new List <int>(); using (StopwatchScope.For(getItemsToReduceDuration)) { foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token)) { scheduledItemsMappedBuckets.Add(item.Bucket); scheduledItemsSum += item.Size; scheduledItemsCount++; } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds)); autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum); if (scheduledItemsCount == 0) { // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might have smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); var deletingScheduledReductionsDuration = Stopwatch.StartNew(); using (StopwatchScope.For(deletingScheduledReductionsDuration)) { foreach (var reduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); } var removeReduceResultsDuration = new Stopwatch(); foreach (var reduceKey in localKeys) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { localNeedToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } if (Log.IsDebugEnabled) { Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); } using (StopwatchScope.For(removeReduceResultsDuration)) { // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token); // add scheduled items too to be sure we will delete reduce results of already deleted documents foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets)) { actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024); } } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); }); reduceLevelStats.Operations.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var getMappedResultsDuration = new Stopwatch(); var reductionPerformanceStats = new List <IndexingPerformanceStats>(); var keysLeftToReduce = new HashSet <string>(keysToReduce); while (keysLeftToReduce.Count > 0) { var keysReturned = new HashSet <string>(); // Try to diminish the allocations happening because of .Resize() var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count); context.TransactionalStorage.Batch(actions => { var take = context.CurrentNumberOfItemsToReduceInSingleBatch; using (StopwatchScope.For(getMappedResultsDuration)) { mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults); } }); var count = mappedResults.Count; int size = 0; foreach (var item in mappedResults) { item.Bucket = 0; size += item.Size; } var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray(); context.MetricsCounters.ReducedPerSecond.Mark(results.Length); token.ThrowIfCancellationRequested(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count); reductionPerformanceStats.Add(performance); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); } var needToMoveToSingleStep = new HashSet <string>(); HashSet <string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep)); } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index foreach (var stats in reductionPerformanceStats) { reduceLevelStats.Add(stats); } reducePerformanceStats.LevelStats.Add(reduceLevelStats); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _); } return(reducePerformanceStats); }
private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token) { var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>(); if ( Log.IsDebugEnabled ) Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var reducingBatchThrottlerId = Guid.NewGuid(); var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep); var reduceLevelStats = new ReduceLevelPeformanceStats { Started = SystemTime.UtcNow, Level = 2 }; try { var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localNeedToMoveToSingleStep = new HashSet<string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { token.ThrowIfCancellationRequested(); localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var getItemsToReduceDuration = Stopwatch.StartNew(); int scheduledItemsSum = 0; int scheduledItemsCount = 0; List<int> scheduledItemsMappedBuckets = new List<int>(); using (StopwatchScope.For(getItemsToReduceDuration)) { foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token)) { scheduledItemsMappedBuckets.Add(item.Bucket); scheduledItemsSum += item.Size; scheduledItemsCount++; } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds)); autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum); if (scheduledItemsCount == 0) { // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might have smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); var deletingScheduledReductionsDuration = Stopwatch.StartNew(); using (StopwatchScope.For(deletingScheduledReductionsDuration)) { foreach (var reduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); } var removeReduceResultsDuration = new Stopwatch(); foreach (var reduceKey in localKeys) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) localNeedToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; if ( Log.IsDebugEnabled ) { Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); } using (StopwatchScope.For(removeReduceResultsDuration)) { // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token); // add scheduled items too to be sure we will delete reduce results of already deleted documents foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets)) { actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024); } } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); }); reduceLevelStats.Operations.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var getMappedResultsDuration = new Stopwatch(); var reductionPerformanceStats = new List<IndexingPerformanceStats>(); var keysLeftToReduce = new HashSet<string>(keysToReduce); while (keysLeftToReduce.Count > 0) { var keysReturned = new HashSet<string>(); // Try to diminish the allocations happening because of .Resize() var mappedResults = new List<MappedResultInfo>(keysLeftToReduce.Count); context.TransactionalStorage.Batch(actions => { var take = context.CurrentNumberOfItemsToReduceInSingleBatch; using (StopwatchScope.For(getMappedResultsDuration)) { mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults); } }); var count = mappedResults.Count; int size = 0; foreach ( var item in mappedResults ) { item.Bucket = 0; size += item.Size; } var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray(); context.MetricsCounters.ReducedPerSecond.Mark(results.Length); token.ThrowIfCancellationRequested(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count); reductionPerformanceStats.Add(performance); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); } var needToMoveToSingleStep = new HashSet<string>(); HashSet<string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep)); } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index foreach (var stats in reductionPerformanceStats) { reduceLevelStats.Add(stats); } reducePerformanceStats.LevelStats.Add(reduceLevelStats); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _); } return reducePerformanceStats; }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName); if (viewGenerator == null) { return; } TimeSpan reduceDuration = TimeSpan.Zero; int totalCount = 0; int totalSize = 0; bool operationCanceled = false; var itemsToDelete = new List <object>(); try { var sw = Stopwatch.StartNew(); for (int i = 0; i < 3; i++) { var level = i; transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var sp = Stopwatch.StartNew(); var persistedResults = actions.MapReduce.GetItemsToReduce ( take: context.CurrentNumberOfItemsToReduceInSingleBatch, level: level, index: indexToWorkOn.IndexName, itemsToDelete: itemsToDelete ) .ToList(); totalCount += persistedResults.Count; totalSize += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName, level, sp.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)).Distinct().ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(indexToWorkOn.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBukcets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(indexToWorkOn.IndexName, level + 1, reduceKeysAndBukcets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(indexToWorkOn.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, indexToWorkOn.IndexName, sp.Elapsed); }); } reduceDuration = sw.Elapsed; } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp); }); autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration); } } }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName); if (viewGenerator == null) return; TimeSpan reduceDuration = TimeSpan.Zero; int totalCount = 0; int totalSize = 0; bool operationCanceled = false; var itemsToDelete = new List<object>(); IList<ReduceTypePerKey> mappedResultsInfo = null; transactionalStorage.Batch(actions => { mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexName, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep).ToList(); }); var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray(); var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray(); var sw = Stopwatch.StartNew(); try { if (singleStepReduceKeys.Length > 0) { var reduceCounters = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete); totalCount += reduceCounters.count; totalSize += reduceCounters.size; } if (multiStepsReduceKeys.Length > 0) { var reduceCounters = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete); totalCount += reduceCounters.count; totalSize += reduceCounters.size; } reduceDuration = sw.Elapsed; } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if(latest == null) return; actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp); }); autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration); } } }
private void MarkIndexes(IndexToWorkOn indexToWorkOn, ComparableByteArray lastIndexedEtag, IStorageActionsAccessor actions, Guid lastEtag, DateTime lastModified) { if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) > 0) return; actions.Indexing.UpdateLastIndexed(indexToWorkOn.IndexName, lastEtag, lastModified); }
private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToSingleStep = new HashSet <string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); transactionalStorage.Batch(actions => { var scheduledItems = actions.MapReduce.GetItemsToReduce ( level: 0, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: false, take: int.MaxValue, // just get all, we do the rate limit when we load the number of keys to reduce, anyway itemsAlreadySeen: new HashSet <Tuple <string, int> >() ).ToList(); // Only look at the scheduled batch for this run, not the entire set of pending reductions. //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray(); foreach (var reduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { needToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, keysToReduce, loadData: true ).ToList(); result.count += mappedResults.Count; result.size += mappedResults.Sum(x => x.Size); var reduceKeys = new HashSet <string>(keysToReduce); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); var results = mappedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys); }); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } return(result); }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete) { var needToMoveToSingleStep = new HashSet<string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue <Tuple<HashSet<string>, List<MappedResultInfo>>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) needToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet<string>(state.SelectMany(x=>x.Item1)); var results = state.SelectMany(x=>x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } }
private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); actions.MapReduce.ScheduleReductions(index.IndexName, 0, mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))); } }); for (int i = 0; i < 3; i++) { var level = i; bool retry = true; var itemsAlreadySeen = new HashSet <Tuple <string, int> >(); while (retry) { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var sp = Stopwatch.StartNew(); var persistedResults = actions.MapReduce.GetItemsToReduce ( level: level, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: true, take: context.CurrentNumberOfItemsToReduceInSingleBatch, itemsAlreadySeen: itemsAlreadySeen ).ToList(); if (persistedResults.Count == 0) { retry = false; return; } result.count += persistedResults.Count; result.size += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, sp.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, index.IndexName, sp.Elapsed); }); } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } return(result); }
protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn, CancellationToken token) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId); if (viewGenerator == null) return null; bool operationCanceled = false; var itemsToDelete = new ConcurrentSet<object>(); var singleStepReduceKeys = new List<string>(); var multiStepsReduceKeys = new List<string>(); transactionalStorage.Batch(actions => { var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId, context.CurrentNumberOfItemsToReduceInSingleBatch, context.NumberOfItemsToExecuteReduceInSingleStep, token); foreach (var key in mappedResultsInfo) { token.ThrowIfCancellationRequested(); switch (key.OperationTypeToPerform) { case ReduceType.SingleStep: singleStepReduceKeys.Add(key.ReduceKey); break; case ReduceType.MultiStep: multiStepsReduceKeys.Add(key.ReduceKey); break; } } }); currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index); var performanceStats = new List<ReducingPerformanceStats>(); try { if (singleStepReduceKeys.Count > 0) { if ( Log.IsDebugEnabled ) Log.Debug("SingleStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ",")); var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, token); performanceStats.Add(singleStepStats); } if (multiStepsReduceKeys.Count > 0) { if ( Log.IsDebugEnabled ) Log.Debug("MultiStep reduce for keys: {0}", multiStepsReduceKeys.Select(x => x + ",")); var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, token); performanceStats.Add(multiStepStats); } } catch (OperationCanceledException) { operationCanceled = true; } catch (AggregateException e) { var anyOperationsCanceled = e .InnerExceptions .OfType<OperationCanceledException>() .Any(); if (anyOperationsCanceled == false) throw; operationCanceled = true; } finally { var postReducingOperations = new ReduceLevelPeformanceStats { Level = -1, Started = SystemTime.UtcNow }; if (operationCanceled == false) { var deletingScheduledReductionsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; ScheduledReductionInfo latest; using (StopwatchScope.For(deletingScheduledReductionsDuration)) { latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); } if (latest == null) return; actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp); }); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); } postReducingOperations.Completed = SystemTime.UtcNow; postReducingOperations.Duration = postReducingOperations.Completed - postReducingOperations.Started; performanceStats.Add(new ReducingPerformanceStats(ReduceType.None) { LevelStats = new List<ReduceLevelPeformanceStats> { postReducingOperations } }); Index _; currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _); } return performanceStats.ToArray(); }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName); if (viewGenerator == null) return; TimeSpan reduceDuration = TimeSpan.Zero; int totalCount = 0; int totalSize = 0; bool operationCanceled = false; var itemsToDelete = new List<object>(); try { var sw = Stopwatch.StartNew(); for (int i = 0; i < 3; i++) { var level = i; transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var sp = Stopwatch.StartNew(); var persistedResults = actions.MapReduce.GetItemsToReduce ( take: context.CurrentNumberOfItemsToReduceInSingleBatch, level: level, index: indexToWorkOn.IndexName, itemsToDelete: itemsToDelete ) .ToList(); totalCount += persistedResults.Count; totalSize += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName, level, sp.Elapsed)); else Log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x=>x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(indexToWorkOn.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBukcets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(indexToWorkOn.IndexName, level + 1, reduceKeysAndBukcets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(indexToWorkOn.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, indexToWorkOn.IndexName, sp.Elapsed); }); } reduceDuration = sw.Elapsed; } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest= actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if(latest == null) return; actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp); }); autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration); } } }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (MapIndexingInProgress(new List <IndexToWorkOn> { indexToWorkOn })) { var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } IndexingBatchInfo batchInfo = null; try { context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, precomputedBatch.Documents.Sum(x => x.SerializedSizeOnDisk), new List <string>() { indexToWorkOn.Index.PublicName }, out batchInfo); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); } finally { var performance = indexingBatchForIndex.Batch.GetIndexingPerformance(); if (batchInfo != null) { if (performance != null) { batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); } batchInfo.BatchCompleted(); } } } }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; var indexingBatchForIndex = FilterIndexes(new List<IndexToWorkOn>() {indexToWorkOn}, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) return; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete) { var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue<Tuple<HashSet<string>, List<MappedResultInfo>>>(); var reducingBatchThrottlerId = Guid.NewGuid(); try { BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localNeedToMoveToSingleStep = new HashSet<string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size)); if (scheduledItems.Count == 0) { if (Log.IsWarnEnabled) { Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); } // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might bave smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on foreach (var reduceKey in keysToReduce) { actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey); } } foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) localNeedToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet<string>(state.SelectMany(x => x.Item1)); var results = state.SelectMany(x => x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x=>x.Item2.Count)) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); var needToMoveToSingleStep = new HashSet<string>(); HashSet<string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _); } }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token) { token.ThrowIfCancellationRequested(); context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (LogContext.WithDatabase(context.DatabaseName)) using (MapIndexingInProgress(new List<IndexToWorkOn> { indexToWorkOn })) { IndexingBatchForIndex indexingBatchForIndex; if (precomputedBatch.Documents.Count > 0) { indexingBatchForIndex = FilterIndexes( new List<IndexToWorkOn> {indexToWorkOn}, precomputedBatch.Documents, precomputedBatch.LastIndexed) .FirstOrDefault(); } else { indexingBatchForIndex = new IndexingBatchForIndex { Batch = new IndexingBatch(precomputedBatch.LastIndexed), Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = precomputedBatch.LastIndexed }; } if (indexingBatchForIndex == null) return; IndexingBatchInfo batchInfo = null; IndexingPerformanceStats performance = null; try { batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List<string> { indexToWorkOn.Index.PublicName }); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token); } finally { if (batchInfo != null) { if (performance != null) batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); context.ReportIndexingBatchCompleted(batchInfo); } } } indexReplacer.ReplaceIndexes(new []{ indexToWorkOn.IndexId }); }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token) { token.ThrowIfCancellationRequested(); context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (LogContext.WithDatabase(context.DatabaseName)) using (MapIndexingInProgress(new List <IndexToWorkOn> { indexToWorkOn })) { var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn> { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } IndexingBatchInfo batchInfo = null; IndexingPerformanceStats performance = null; try { batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List <string> { indexToWorkOn.Index.PublicName }); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token); } finally { if (batchInfo != null) { if (performance != null) { batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); } context.ReportIndexingBatchCompleted(batchInfo); } } } indexReplacer.ReplaceIndexes(new [] { indexToWorkOn.IndexId }); }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete) { var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue <Tuple <HashSet <string>, List <MappedResultInfo> > >(); var reducingBatchThrottlerId = Guid.NewGuid(); try { BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localNeedToMoveToSingleStep = new HashSet <string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet <string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size)); if (scheduledItems.Count == 0) { if (Log.IsWarnEnabled) { Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); } // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might bave smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on foreach (var reduceKey in keysToReduce) { actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey); } } foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { localNeedToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet <string>(state.SelectMany(x => x.Item1)); var results = state.SelectMany(x => x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x => x.Item2.Count)) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); var needToMoveToSingleStep = new HashSet <string>(); HashSet <string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _); } }