private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete) { var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue<Tuple<HashSet<string>, List<MappedResultInfo>>>(); var reducingBatchThrottlerId = Guid.NewGuid(); try { BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localNeedToMoveToSingleStep = new HashSet<string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size)); if (scheduledItems.Count == 0) { if (Log.IsWarnEnabled) { Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); } // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might bave smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on foreach (var reduceKey in keysToReduce) { actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey); } } foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) localNeedToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet<string>(state.SelectMany(x => x.Item1)); var results = state.SelectMany(x => x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x=>x.Item2.Count)) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); var needToMoveToSingleStep = new HashSet<string>(); HashSet<string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _); } }
private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete) { var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexName, 0, result); } } }); for (int i = 0; i < 3; i++) { var level = i; var reduceParams = new GetItemsToReduceParams( index.IndexName, keysToReduce, level, true, itemsToDelete); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList(); if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; var size = persistedResults.Sum(x => x.Size); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, batchTimeWatcher.Elapsed)); else Log.Debug("No reduce keys found for {0}", index.IndexName); } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); foreach (var reduceKeysAndBucket in reduceKeysAndBuckets) { actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket); } } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); var batchDuration = batchTimeWatcher.Elapsed; Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level); autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _); } } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } }
public IEnumerable<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams) { Api.JetSetCurrentIndex(session, ScheduledReductions, "by_view_level_and_hashed_reduce_key_and_bucket"); var seenLocally = new HashSet<Tuple<string, int>>(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys.ToArray()) { Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Index, Encoding.Unicode, MakeKeyGrbit.NewKey); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Level, MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, HashReduceKey(reduceKey), MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, 0, MakeKeyGrbit.None); if (Api.TrySeek(session, ScheduledReductions, SeekGrbit.SeekGE) == false) continue; Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Index, Encoding.Unicode, MakeKeyGrbit.NewKey); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Level, MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, HashReduceKey(reduceKey), MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, int.MaxValue, MakeKeyGrbit.None); if(Api.TrySetIndexRange(session, ScheduledReductions, SetIndexRangeGrbit.RangeInclusive | SetIndexRangeGrbit.RangeUpperLimit) == false) continue; // this isn't used for optimized reading, but to make it easier to delete records later on OptimizedDeleter reader; if (getItemsToReduceParams.ItemsToDelete.Count == 0) { getItemsToReduceParams.ItemsToDelete.Add(reader = new OptimizedDeleter()); } else { reader = (OptimizedDeleter)getItemsToReduceParams.ItemsToDelete[0]; } do { if (getItemsToReduceParams.Take <= 0) break; var indexFromDb = Api.RetrieveColumnAsString(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["view"], Encoding.Unicode, RetrieveColumnGrbit.RetrieveFromIndex); var levelFromDb = Api.RetrieveColumnAsInt32(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["level"], RetrieveColumnGrbit.RetrieveFromIndex). Value; var reduceKeyFromDb = Api.RetrieveColumnAsString(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["reduce_key"]); if (string.Equals(getItemsToReduceParams.Index, indexFromDb, StringComparison.OrdinalIgnoreCase) == false) continue; if (levelFromDb != getItemsToReduceParams.Level) continue; if (string.Equals(reduceKeyFromDb, reduceKey, StringComparison.Ordinal) == false) continue; var bucket = Api.RetrieveColumnAsInt32(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["bucket"]).Value; var rowKey = Tuple.Create(reduceKeyFromDb, bucket); var thisIsNewScheduledReductionRow = reader.Add(session, ScheduledReductions); var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData)) { getItemsToReduceParams.Take--; yield return mappedResultInfo; } } } if (getItemsToReduceParams.Take <= 0) yield break; } while (Api.TryMoveNext(session, ScheduledReductions)); getItemsToReduceParams.ReduceKeys.Remove(reduceKey); if (getItemsToReduceParams.Take <= 0) break; } }
private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token) { var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>(); if ( Log.IsDebugEnabled ) Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var reducingBatchThrottlerId = Guid.NewGuid(); var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep); var reduceLevelStats = new ReduceLevelPeformanceStats { Started = SystemTime.UtcNow, Level = 2 }; try { var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localNeedToMoveToSingleStep = new HashSet<string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { token.ThrowIfCancellationRequested(); localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var getItemsToReduceDuration = Stopwatch.StartNew(); int scheduledItemsSum = 0; int scheduledItemsCount = 0; List<int> scheduledItemsMappedBuckets = new List<int>(); using (StopwatchScope.For(getItemsToReduceDuration)) { foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token)) { scheduledItemsMappedBuckets.Add(item.Bucket); scheduledItemsSum += item.Size; scheduledItemsCount++; } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds)); autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum); if (scheduledItemsCount == 0) { // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might have smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); var deletingScheduledReductionsDuration = Stopwatch.StartNew(); using (StopwatchScope.For(deletingScheduledReductionsDuration)) { foreach (var reduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); } var removeReduceResultsDuration = new Stopwatch(); foreach (var reduceKey in localKeys) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) localNeedToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; if ( Log.IsDebugEnabled ) { Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); } using (StopwatchScope.For(removeReduceResultsDuration)) { // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token); // add scheduled items too to be sure we will delete reduce results of already deleted documents foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets)) { actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024); } } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); }); reduceLevelStats.Operations.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var getMappedResultsDuration = new Stopwatch(); var reductionPerformanceStats = new List<IndexingPerformanceStats>(); var keysLeftToReduce = new HashSet<string>(keysToReduce); while (keysLeftToReduce.Count > 0) { var keysReturned = new HashSet<string>(); // Try to diminish the allocations happening because of .Resize() var mappedResults = new List<MappedResultInfo>(keysLeftToReduce.Count); context.TransactionalStorage.Batch(actions => { var take = context.CurrentNumberOfItemsToReduceInSingleBatch; using (StopwatchScope.For(getMappedResultsDuration)) { mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults); } }); var count = mappedResults.Count; int size = 0; foreach ( var item in mappedResults ) { item.Bucket = 0; size += item.Size; } var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray(); context.MetricsCounters.ReducedPerSecond.Mark(results.Length); token.ThrowIfCancellationRequested(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count); reductionPerformanceStats.Add(performance); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); } var needToMoveToSingleStep = new HashSet<string>(); HashSet<string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep)); } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index foreach (var stats in reductionPerformanceStats) { reduceLevelStats.Add(stats); } reducePerformanceStats.LevelStats.Add(reduceLevelStats); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _); } return reducePerformanceStats; }
private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token) { var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexId, 0, result); } } }); var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep); var keysToReduceSet = new HashSet<string>(keysToReduce); for (int i = 0; i < 3; i++) { var level = i; var reduceLevelStats = new ReduceLevelPeformanceStats() { Level = level, Started = SystemTime.UtcNow, }; var reduceParams = new GetItemsToReduceParams( index.IndexId, keysToReduceSet, level, true, itemsToDelete); var gettingItemsToReduceDuration = new Stopwatch(); var scheduleReductionsDuration = new Stopwatch(); var removeReduceResultsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { token.ThrowIfCancellationRequested(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; int size = 0; IList<MappedResultInfo> persistedResults; var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase); using (StopwatchScope.For(gettingItemsToReduceDuration)) { persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token); foreach (var item in persistedResults) { reduceKeys.Add(item.ReduceKey); size += item.Size; } } if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexId, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexId); } } token.ThrowIfCancellationRequested(); var requiredReduceNextTimeSet = new HashSet<ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(removeReduceResultsDuration)) { foreach (var mappedResultInfo in requiredReduceNextTimeSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } } if (level != 2) { var reduceKeysAndBucketsSet = new HashSet<ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(scheduleReductionsDuration)) { foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket); } } } token.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); var results = persistedResults.Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); context.MetricsCounters.ReducedPerSecond.Mark(results.Count()); reduceLevelStats.Add(performance); var batchDuration = batchTimeWatcher.Elapsed; if ( Log.IsDebugEnabled ) { Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.IndexId, reduceTimeWatcher.Elapsed, level); } autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _); } } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); reducePerformance.LevelStats.Add(reduceLevelStats); } foreach (var reduceKey in needToMoveToMultiStep) { token.ThrowIfCancellationRequested(); string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.MultiStep)); } return reducePerformance; }
public IEnumerable<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams) { var seenLocally = new HashSet<Tuple<string, int>>(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys.ToArray()) { var keyCriteria = new RavenJObject { {"view", getItemsToReduceParams.Index}, {"level", getItemsToReduceParams.Level}, {"reduceKey", reduceKey} }; foreach (var result in storage.ScheduleReductions["ByViewLevelReduceKeyAndBucket"].SkipTo(keyCriteria)) { var indexFromDb = result.Value<string>("view"); var levelFromDb = result.Value<int>("level"); var reduceKeyFromDb = result.Value<string>("reduceKey"); if (string.Equals(indexFromDb, getItemsToReduceParams.Index, StringComparison.InvariantCultureIgnoreCase) == false || levelFromDb != getItemsToReduceParams.Level) break; if (string.Equals(reduceKeyFromDb, reduceKey, StringComparison.Ordinal) == false) { break; } var bucket = result.Value<int>("bucket"); var rowKey = Tuple.Create(reduceKeyFromDb, bucket); var thisIsNewScheduledReductionRow = getItemsToReduceParams.ItemsToDelete.Contains(result, RavenJTokenEqualityComparer.Default) == false; var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData)) { getItemsToReduceParams.Take--; yield return mappedResultInfo; } } } if(thisIsNewScheduledReductionRow) getItemsToReduceParams.ItemsToDelete.Add(result); if (getItemsToReduceParams.Take <= 0) break; } getItemsToReduceParams.ReduceKeys.Remove(reduceKey); if (getItemsToReduceParams.Take <= 0) break; } }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete) { var needToMoveToSingleStep = new HashSet<string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue <Tuple<HashSet<string>, List<MappedResultInfo>>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) needToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet<string>(state.SelectMany(x=>x.Item1)); var results = state.SelectMany(x=>x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } }