public IEnumerable<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams) { var scheduledReductionsByViewAndLevelAndReduceKey = tableStorage.ScheduledReductions.GetIndex(Tables.ScheduledReductions.Indices.ByViewAndLevelAndReduceKey); var deleter = new ScheduledReductionDeleter(getItemsToReduceParams.ItemsToDelete, o => { var etag = o as Etag; if (etag == null) return null; return (Slice)etag.ToString(); }); var seenLocally = new HashSet<Tuple<string, int>>(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys.ToArray()) { var reduceKeyHash = HashKey(reduceKey); var viewAndLevelAndReduceKey = (Slice) CreateKey(getItemsToReduceParams.Index, getItemsToReduceParams.Level, ReduceKeySizeLimited(reduceKey), reduceKeyHash); using (var iterator = scheduledReductionsByViewAndLevelAndReduceKey.MultiRead(Snapshot, viewAndLevelAndReduceKey)) { if (!iterator.Seek(Slice.BeforeAllKeys)) continue; do { if (getItemsToReduceParams.Take <= 0) break; ushort version; var value = LoadStruct(tableStorage.ScheduledReductions, iterator.CurrentKey, writeBatch.Value, out version); var reduceKeyFromDb = value.ReadString(ScheduledReductionFields.ReduceKey); var bucket = value.ReadInt(ScheduledReductionFields.Bucket); var rowKey = Tuple.Create(reduceKeyFromDb, bucket); var thisIsNewScheduledReductionRow = deleter.Delete(iterator.CurrentKey, Etag.Parse(value.ReadBytes(ScheduledReductionFields.Etag))); var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData)) { getItemsToReduceParams.Take--; yield return mappedResultInfo; } } } if (getItemsToReduceParams.Take <= 0) yield break; } while (iterator.MoveNext()); } getItemsToReduceParams.ReduceKeys.Remove(reduceKey); if (getItemsToReduceParams.Take <= 0) yield break; } }
public IList<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams, CancellationToken cancellationToken) { Api.JetSetCurrentIndex(session, ScheduledReductions, "by_view_level_and_hashed_reduce_key_and_bucket"); var viewReductionColumn = tableColumnsCache.ScheduledReductionColumns["view"]; var levelReductionColumn = tableColumnsCache.ScheduledReductionColumns["level"]; var reduceReductionColumn = tableColumnsCache.ScheduledReductionColumns["reduce_key"]; var bucketReductionColumn = tableColumnsCache.ScheduledReductionColumns["bucket"]; var keysToRemove = new List<string>(); var output = new List<MappedResultInfo>(); var seenLocally = new HashSet<ReduceKeyAndBucket>(ReduceKeyAndBucketEqualityComparer.Instance); try { var first = true; foreach (var reduceKey in getItemsToReduceParams.ReduceKeys) { cancellationToken.ThrowIfCancellationRequested(); int initialBucket = 0; bool needToMoveNext = false; if (first) { first = false; if (getItemsToReduceParams.LastReduceKeyAndBucket != null) { if (getItemsToReduceParams.LastReduceKeyAndBucket.ReduceKey != reduceKey) { needToMoveNext = true; initialBucket = getItemsToReduceParams.LastReduceKeyAndBucket.Bucket; } } } Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Index, MakeKeyGrbit.NewKey); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Level, MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, HashReduceKey(reduceKey), MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, initialBucket, MakeKeyGrbit.None); if (Api.TrySeek(session, ScheduledReductions, SeekGrbit.SeekGE) == false || (needToMoveNext && Api.TryMoveNext(session, ScheduledReductions) == false) ) { keysToRemove.Add(reduceKey); continue; } Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Index, MakeKeyGrbit.NewKey); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Level, MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, HashReduceKey(reduceKey), MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, int.MaxValue, MakeKeyGrbit.None); if (Api.TrySetIndexRange(session, ScheduledReductions, SetIndexRangeGrbit.RangeInclusive | SetIndexRangeGrbit.RangeUpperLimit) == false) { keysToRemove.Add(reduceKey); continue; } // this isn't used for optimized reading, but to make it easier to delete records later on OptimizedDeleter reader; if (getItemsToReduceParams.ItemsToDelete.Count == 0) { getItemsToReduceParams.ItemsToDelete.Add(reader = new OptimizedDeleter()); } else { reader = (OptimizedDeleter)getItemsToReduceParams.ItemsToDelete.First(); } reader.IndexId = getItemsToReduceParams.Index; do { cancellationToken.ThrowIfCancellationRequested(); if (getItemsToReduceParams.Take <= 0) return output; var indexFromDb = Api.RetrieveColumnAsInt32(session, ScheduledReductions, viewReductionColumn, RetrieveColumnGrbit.RetrieveFromIndex); var levelFromDb = Api.RetrieveColumnAsInt32(session, ScheduledReductions, levelReductionColumn, RetrieveColumnGrbit.RetrieveFromIndex).Value; var reduceKeyFromDb = Api.RetrieveColumnAsString(session, ScheduledReductions, reduceReductionColumn); if (getItemsToReduceParams.Index != indexFromDb) break; if (levelFromDb != getItemsToReduceParams.Level) break; if (string.Equals(reduceKeyFromDb, reduceKey, StringComparison.Ordinal) == false) break; var bucket = Api.RetrieveColumnAsInt32(session, ScheduledReductions, bucketReductionColumn).Value; var rowKey = new ReduceKeyAndBucket(bucket, reduceKeyFromDb); var thisIsNewScheduledReductionRow = reader.Add(session, ScheduledReductions, getItemsToReduceParams.Level); if (thisIsNewScheduledReductionRow) { if (seenLocally.Add(rowKey)) { getItemsToReduceParams.LastReduceKeyAndBucket = rowKey; foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData, cancellationToken)) { getItemsToReduceParams.Take--; output.Add(mappedResultInfo); } } } } while (Api.TryMoveNext(session, ScheduledReductions)); keysToRemove.Add(reduceKey); if (getItemsToReduceParams.Take <= 0) break; } return output; } finally { // In whatever condition we would have to return, we must signal the removal of the reduce keys. foreach (var keyToRemove in keysToRemove) getItemsToReduceParams.ReduceKeys.Remove(keyToRemove); } }
private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token) { var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >(); if (Log.IsDebugEnabled) { Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce))); } var batchTimeWatcher = Stopwatch.StartNew(); var reducingBatchThrottlerId = Guid.NewGuid(); var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep); var reduceLevelStats = new ReduceLevelPeformanceStats { Started = SystemTime.UtcNow, Level = 2 }; try { var parallelOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localNeedToMoveToSingleStep = new HashSet <string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet <string>(); while (enumerator.MoveNext()) { token.ThrowIfCancellationRequested(); localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var getItemsToReduceDuration = Stopwatch.StartNew(); int scheduledItemsSum = 0; int scheduledItemsCount = 0; List <int> scheduledItemsMappedBuckets = new List <int>(); using (StopwatchScope.For(getItemsToReduceDuration)) { foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token)) { scheduledItemsMappedBuckets.Add(item.Bucket); scheduledItemsSum += item.Size; scheduledItemsCount++; } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds)); autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum); if (scheduledItemsCount == 0) { // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might have smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); var deletingScheduledReductionsDuration = Stopwatch.StartNew(); using (StopwatchScope.For(deletingScheduledReductionsDuration)) { foreach (var reduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); } var removeReduceResultsDuration = new Stopwatch(); foreach (var reduceKey in localKeys) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { localNeedToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } if (Log.IsDebugEnabled) { Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); } using (StopwatchScope.For(removeReduceResultsDuration)) { // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token); // add scheduled items too to be sure we will delete reduce results of already deleted documents foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets)) { actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024); } } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); }); reduceLevelStats.Operations.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var getMappedResultsDuration = new Stopwatch(); var reductionPerformanceStats = new List <IndexingPerformanceStats>(); var keysLeftToReduce = new HashSet <string>(keysToReduce); while (keysLeftToReduce.Count > 0) { var keysReturned = new HashSet <string>(); // Try to diminish the allocations happening because of .Resize() var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count); context.TransactionalStorage.Batch(actions => { var take = context.CurrentNumberOfItemsToReduceInSingleBatch; using (StopwatchScope.For(getMappedResultsDuration)) { mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults); } }); var count = mappedResults.Count; int size = 0; foreach (var item in mappedResults) { item.Bucket = 0; size += item.Size; } var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray(); context.MetricsCounters.ReducedPerSecond.Mark(results.Length); token.ThrowIfCancellationRequested(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count); reductionPerformanceStats.Add(performance); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); } var needToMoveToSingleStep = new HashSet <string>(); HashSet <string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep)); } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index foreach (var stats in reductionPerformanceStats) { reduceLevelStats.Add(stats); } reducePerformanceStats.LevelStats.Add(reduceLevelStats); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _); } return(reducePerformanceStats); }
public IEnumerable<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams, CancellationToken cancellationToken) { Api.JetSetCurrentIndex(session, ScheduledReductions, "by_view_level_and_hashed_reduce_key_and_bucket"); var seenLocally = new HashSet<Tuple<string, int>>(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys.ToArray()) { cancellationToken.ThrowIfCancellationRequested(); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Index, MakeKeyGrbit.NewKey); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Level, MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, HashReduceKey(reduceKey), MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, 0, MakeKeyGrbit.None); if (Api.TrySeek(session, ScheduledReductions, SeekGrbit.SeekGE) == false) continue; Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Index, MakeKeyGrbit.NewKey); Api.MakeKey(session, ScheduledReductions, getItemsToReduceParams.Level, MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, HashReduceKey(reduceKey), MakeKeyGrbit.None); Api.MakeKey(session, ScheduledReductions, int.MaxValue, MakeKeyGrbit.None); if(Api.TrySetIndexRange(session, ScheduledReductions, SetIndexRangeGrbit.RangeInclusive | SetIndexRangeGrbit.RangeUpperLimit) == false) continue; // this isn't used for optimized reading, but to make it easier to delete records later on OptimizedDeleter reader; if (getItemsToReduceParams.ItemsToDelete.Count == 0) { getItemsToReduceParams.ItemsToDelete.Add(reader = new OptimizedDeleter()); } else { reader = (OptimizedDeleter)getItemsToReduceParams.ItemsToDelete.First(); } do { cancellationToken.ThrowIfCancellationRequested(); if (getItemsToReduceParams.Take <= 0) break; var indexFromDb = Api.RetrieveColumnAsInt32(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["view"], RetrieveColumnGrbit.RetrieveFromIndex); var levelFromDb = Api.RetrieveColumnAsInt32(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["level"], RetrieveColumnGrbit.RetrieveFromIndex). Value; var reduceKeyFromDb = Api.RetrieveColumnAsString(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["reduce_key"]); if (getItemsToReduceParams.Index != indexFromDb) continue; if (levelFromDb != getItemsToReduceParams.Level) continue; if (string.Equals(reduceKeyFromDb, reduceKey, StringComparison.Ordinal) == false) continue; var bucket = Api.RetrieveColumnAsInt32(session, ScheduledReductions, tableColumnsCache.ScheduledReductionColumns["bucket"]).Value; var rowKey = Tuple.Create(reduceKeyFromDb, bucket); var thisIsNewScheduledReductionRow = reader.Add(session, ScheduledReductions); var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData, cancellationToken)) { getItemsToReduceParams.Take--; yield return mappedResultInfo; } } } if (getItemsToReduceParams.Take <= 0) yield break; } while (Api.TryMoveNext(session, ScheduledReductions)); getItemsToReduceParams.ReduceKeys.Remove(reduceKey); if (getItemsToReduceParams.Take <= 0) break; } }
private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete) { var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexName, 0, result); } } }); for (int i = 0; i < 3; i++) { var level = i; var reduceParams = new GetItemsToReduceParams( index.IndexName, keysToReduce, level, true, itemsToDelete); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList(); if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; var size = persistedResults.Sum(x => x.Size); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); foreach (var reduceKeysAndBucket in reduceKeysAndBuckets) { actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket); } } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); var batchDuration = batchTimeWatcher.Elapsed; Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level); autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _); } } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } }
private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token) { var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexId, 0, result); } } }); var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep); for (int i = 0; i < 3; i++) { var level = i; var reduceLevelStats = new ReduceLevelPeformanceStats() { Level = level, Started = SystemTime.UtcNow, }; var reduceParams = new GetItemsToReduceParams( index.IndexId, new HashSet <string>(keysToReduce), level, true, itemsToDelete); var gettingItemsToReduceDuration = new Stopwatch(); var scheduleReductionsDuration = new Stopwatch(); var removeReduceResultsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { token.ThrowIfCancellationRequested(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; int size = 0; IList <MappedResultInfo> persistedResults; var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); using (StopwatchScope.For(gettingItemsToReduceDuration)) { persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token); foreach (var item in persistedResults) { reduceKeys.Add(item.ReduceKey); size += item.Size; } } if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.Index.PublicName, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.Index.PublicName); } } token.ThrowIfCancellationRequested(); var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(removeReduceResultsDuration)) { foreach (var mappedResultInfo in requiredReduceNextTimeSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } } if (level != 2) { var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(scheduleReductionsDuration)) { foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket); } } } token.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); var results = persistedResults.Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToList(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); context.MetricsCounters.ReducedPerSecond.Mark(results.Count()); reduceLevelStats.Add(performance); var batchDuration = batchTimeWatcher.Elapsed; if (Log.IsDebugEnabled) { Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level); } autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _); } } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); reducePerformance.LevelStats.Add(reduceLevelStats); } foreach (var reduceKey in needToMoveToMultiStep) { token.ThrowIfCancellationRequested(); string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.MultiStep)); } return(reducePerformance); }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete) { var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue <Tuple <HashSet <string>, List <MappedResultInfo> > >(); var reducingBatchThrottlerId = Guid.NewGuid(); try { BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localNeedToMoveToSingleStep = new HashSet <string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet <string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size)); if (scheduledItems.Count == 0) { if (Log.IsWarnEnabled) { Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); } // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might bave smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on foreach (var reduceKey in keysToReduce) { actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey); } } foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { localNeedToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet <string>(state.SelectMany(x => x.Item1)); var results = state.SelectMany(x => x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x => x.Item2.Count)) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); var needToMoveToSingleStep = new HashSet <string>(); HashSet <string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _); } }
public IEnumerable<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams, CancellationToken cancellationToken) { var scheduledReductionsByViewAndLevelAndReduceKey = tableStorage.ScheduledReductions.GetIndex(Tables.ScheduledReductions.Indices.ByViewAndLevelAndReduceKey); var deleter = new ScheduledReductionDeleter(getItemsToReduceParams.ItemsToDelete, o => { var etag = o as Etag; if (etag == null) return null; return (Slice)etag.ToString(); }); var seenLocally = new HashSet<ReduceKeyAndBucket>(ReduceKeyAndBucketEqualityComparer.Instance); var keysToRemove = new List<string>(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys) { cancellationToken.ThrowIfCancellationRequested(); var reduceKeyHash = HashKey(reduceKey); var viewAndLevelAndReduceKey = (Slice) CreateKey(getItemsToReduceParams.Index, getItemsToReduceParams.Level, ReduceKeySizeLimited(reduceKey), reduceKeyHash); using (var iterator = scheduledReductionsByViewAndLevelAndReduceKey.MultiRead(Snapshot, viewAndLevelAndReduceKey)) { if (!iterator.Seek(Slice.BeforeAllKeys)) continue; do { cancellationToken.ThrowIfCancellationRequested(); if (getItemsToReduceParams.Take <= 0) break; ushort version; var value = LoadStruct(tableStorage.ScheduledReductions, iterator.CurrentKey, writeBatch.Value, out version); if (value == null) // TODO: Check if this is correct. continue; var reduceKeyFromDb = value.ReadString(ScheduledReductionFields.ReduceKey); var bucket = value.ReadInt(ScheduledReductionFields.Bucket); var rowKey = new ReduceKeyAndBucket(bucket, reduceKeyFromDb); var thisIsNewScheduledReductionRow = deleter.Delete(iterator.CurrentKey, Etag.Parse(value.ReadBytes(ScheduledReductionFields.Etag))); var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData, cancellationToken)) { getItemsToReduceParams.Take--; yield return mappedResultInfo; } } } if (getItemsToReduceParams.Take <= 0) yield break; } while (iterator.MoveNext()); } keysToRemove.Add(reduceKey); if (getItemsToReduceParams.Take <= 0) yield break; } foreach (var keyToRemove in keysToRemove) getItemsToReduceParams.ReduceKeys.Remove(keyToRemove); }
public IList<MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams, CancellationToken cancellationToken) { var scheduledReductionsByViewAndLevelAndReduceKey = tableStorage.ScheduledReductions.GetIndex(Tables.ScheduledReductions.Indices.ByViewAndLevelAndReduceKey); var deleter = new ScheduledReductionDeleter(getItemsToReduceParams.ItemsToDelete, o => { var etag = o as Etag; if (etag == null) return null; return (Slice)etag.ToString(); }); var keysToRemove = new List<string>(); try { var seenLocally = new HashSet<ReduceKeyAndBucket>(ReduceKeyAndBucketEqualityComparer.Instance); var mappedResults = new List<MappedResultInfo>(); var first = true; foreach (var reduceKey in getItemsToReduceParams.ReduceKeys) { cancellationToken.ThrowIfCancellationRequested(); Slice start = Slice.BeforeAllKeys; bool needToMoveNext = false; if (first) { first = false; if (getItemsToReduceParams.LastReduceKeyAndBucket != null) { if (getItemsToReduceParams.LastReduceKeyAndBucket.ReduceKey != reduceKey) { throw new InvalidOperationException("Mismatches last reduce key with the remaining reduce keys in the params"); } needToMoveNext = true; start = CreateBucketAndEtagKey(getItemsToReduceParams.LastReduceKeyAndBucket.Bucket, Etag.Empty); } } var viewAndLevelAndReduceKey = CreateScheduleReductionKey(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKey); using (var iterator = scheduledReductionsByViewAndLevelAndReduceKey.MultiRead(Snapshot, viewAndLevelAndReduceKey)) { if (!iterator.Seek(start)) continue; if(needToMoveNext && iterator.MoveNext() ==false) continue; do { cancellationToken.ThrowIfCancellationRequested(); if (getItemsToReduceParams.Take <= 0) break; var idValueReader = iterator.CurrentKey.CreateReader(); idValueReader.ReadBigEndianInt32(); // bucket int _; var id = new Slice(Etag.Parse(idValueReader.ReadBytes(16, out _))); ushort version; var value = LoadStruct(tableStorage.ScheduledReductions, id, writeBatch.Value, out version); if (value == null) // TODO: Check if this is correct. continue; var reduceKeyFromDb = value.ReadString(ScheduledReductionFields.ReduceKey); var bucket = value.ReadInt(ScheduledReductionFields.Bucket); var rowKey = new ReduceKeyAndBucket(bucket, reduceKeyFromDb); var thisIsNewScheduledReductionRow = deleter.Delete(iterator.CurrentKey, Etag.Parse(value.ReadBytes(ScheduledReductionFields.Etag))); if (thisIsNewScheduledReductionRow) { if (seenLocally.Add(rowKey)) { getItemsToReduceParams.LastReduceKeyAndBucket = rowKey; foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData, cancellationToken)) { getItemsToReduceParams.Take--; mappedResults.Add(mappedResultInfo); } } } if (getItemsToReduceParams.Take <= 0) return mappedResults; } while (iterator.MoveNext()); } keysToRemove.Add(reduceKey); if (getItemsToReduceParams.Take <= 0) break; } return mappedResults; } finally { foreach (var keyToRemove in keysToRemove) getItemsToReduceParams.ReduceKeys.Remove(keyToRemove); } }
public IEnumerable <MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams) { var scheduledReductionsByViewAndLevelAndReduceKey = tableStorage.ScheduledReductions.GetIndex(Tables.ScheduledReductions.Indices.ByViewAndLevelAndReduceKey); var deleter = new ScheduledReductionDeleter(getItemsToReduceParams.ItemsToDelete, o => { var json = o as RavenJObject; if (json == null) { return(null); } var etag = Etag.Parse(json.Value <byte[]>("etag")); return(etag.ToString()); }); var seenLocally = new HashSet <Tuple <string, int> >(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys.ToArray()) { var reduceKeyHash = HashKey(reduceKey); var viewAndLevelAndReduceKey = CreateKey(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKey, reduceKeyHash); using (var iterator = scheduledReductionsByViewAndLevelAndReduceKey.MultiRead(Snapshot, viewAndLevelAndReduceKey)) { if (!iterator.Seek(Slice.BeforeAllKeys)) { continue; } do { if (getItemsToReduceParams.Take <= 0) { break; } ushort version; var value = LoadJson(tableStorage.ScheduledReductions, iterator.CurrentKey, writeBatch.Value, out version); var reduceKeyFromDb = value.Value <string>("reduceKey"); var bucket = value.Value <int>("bucket"); var rowKey = Tuple.Create(reduceKeyFromDb, bucket); var thisIsNewScheduledReductionRow = deleter.Delete(iterator.CurrentKey, value); var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData)) { getItemsToReduceParams.Take--; yield return(mappedResultInfo); } } } if (getItemsToReduceParams.Take <= 0) { yield break; } }while (iterator.MoveNext()); } getItemsToReduceParams.ReduceKeys.Remove(reduceKey); if (getItemsToReduceParams.Take <= 0) { yield break; } } }
public IEnumerable <MappedResultInfo> GetItemsToReduce(GetItemsToReduceParams getItemsToReduceParams) { var seenLocally = new HashSet <Tuple <string, int> >(); foreach (var reduceKey in getItemsToReduceParams.ReduceKeys.ToArray()) { var keyCriteria = new RavenJObject { { "view", getItemsToReduceParams.Index }, { "level", getItemsToReduceParams.Level }, { "reduceKey", reduceKey } }; foreach (var result in storage.ScheduleReductions["ByViewLevelReduceKeyAndBucket"].SkipTo(keyCriteria)) { var indexFromDb = result.Value <string>("view"); var levelFromDb = result.Value <int>("level"); var reduceKeyFromDb = result.Value <string>("reduceKey"); if (string.Equals(indexFromDb, getItemsToReduceParams.Index, StringComparison.InvariantCultureIgnoreCase) == false || levelFromDb != getItemsToReduceParams.Level) { break; } if (string.Equals(reduceKeyFromDb, reduceKey, StringComparison.Ordinal) == false) { break; } var bucket = result.Value <int>("bucket"); var rowKey = Tuple.Create(reduceKeyFromDb, bucket); var thisIsNewScheduledReductionRow = getItemsToReduceParams.ItemsToDelete.Contains(result, RavenJTokenEqualityComparer.Default) == false; var neverSeenThisKeyAndBucket = getItemsToReduceParams.ItemsAlreadySeen.Add(rowKey); if (thisIsNewScheduledReductionRow || neverSeenThisKeyAndBucket) { if (seenLocally.Add(rowKey)) { foreach (var mappedResultInfo in GetResultsForBucket(getItemsToReduceParams.Index, getItemsToReduceParams.Level, reduceKeyFromDb, bucket, getItemsToReduceParams.LoadData)) { getItemsToReduceParams.Take--; yield return(mappedResultInfo); } } } if (thisIsNewScheduledReductionRow) { getItemsToReduceParams.ItemsToDelete.Add(result); } if (getItemsToReduceParams.Take <= 0) { break; } } getItemsToReduceParams.ReduceKeys.Remove(reduceKey); if (getItemsToReduceParams.Take <= 0) { break; } } }