private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); actions.MapReduce.ScheduleReductions(index.IndexName, 0, mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))); } }); for (int i = 0; i < 3; i++) { var level = i; bool retry = true; var itemsAlreadySeen = new HashSet <Tuple <string, int> >(); while (retry) { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var sp = Stopwatch.StartNew(); var persistedResults = actions.MapReduce.GetItemsToReduce ( level: level, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: true, take: context.CurrentNumberOfItemsToReduceInSingleBatch, itemsAlreadySeen: itemsAlreadySeen ).ToList(); if (persistedResults.Count == 0) { retry = false; return; } result.count += persistedResults.Count; result.size += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, sp.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, index.IndexName, sp.Elapsed); }); } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } return(result); }
private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); actions.MapReduce.ScheduleReductions(index.IndexName, 0, mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))); } }); for (int i = 0; i < 3; i++) { var level = i; transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var persistedResults = actions.MapReduce.GetItemsToReduce ( level: level, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: true ).ToList(); var sp = Stopwatch.StartNew(); result.count += persistedResults.Count; result.size += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, sp.Elapsed)); else Log.Debug("No reduce keys found for {0}", index.IndexName); } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket/1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, index.IndexName, sp.Elapsed); }); } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } return result; }
private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToSingleStep = new HashSet <string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); transactionalStorage.Batch(actions => { var scheduledItems = actions.MapReduce.GetItemsToReduce ( level: 0, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: false, take: int.MaxValue, // just get all, we do the rate limit when we load the number of keys to reduce, anyway itemsAlreadySeen: new HashSet <Tuple <string, int> >() ).ToList(); // Only look at the scheduled batch for this run, not the entire set of pending reductions. //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray(); foreach (var reduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { needToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, keysToReduce, loadData: true ).ToList(); result.count += mappedResults.Count; result.size += mappedResults.Sum(x => x.Size); var reduceKeys = new HashSet <string>(keysToReduce); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); var results = mappedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys); }); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } return(result); }
private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToSingleStep = new HashSet<string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); transactionalStorage.Batch(actions => { var scheduledItems = actions.MapReduce.GetItemsToReduce ( level: 0, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: false ).ToList(); // Only look at the scheduled batch for this run, not the entire set of pending reductions. //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray(); foreach (var reduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) needToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, keysToReduce, loadData: true ).ToList(); result.count += mappedResults.Count; result.size += mappedResults.Sum(x => x.Size); var reduceKeys = new HashSet<string>(keysToReduce); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); var results = mappedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys); }); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } return result; }