Beispiel #1
0
        public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch)
        {
            context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count);

            var indexToWorkOn = new IndexToWorkOn()
            {
                Index           = precomputedBatch.Index,
                IndexId         = precomputedBatch.Index.indexId,
                LastIndexedEtag = Etag.Empty
            };

            var indexingBatchForIndex =
                FilterIndexes(new List <IndexToWorkOn>()
            {
                indexToWorkOn
            }, precomputedBatch.Documents,
                              precomputedBatch.LastIndexed).FirstOrDefault();

            if (indexingBatchForIndex == null)
            {
                return;
            }

            if (Log.IsDebugEnabled)
            {
                Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}",
                          precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count);
            }

            HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified);
        }
Beispiel #2
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName);

            if (viewGenerator == null)
            {
                return;
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new List <object>();

            IList <ReduceTypePerKey> mappedResultsInfo = null;

            transactionalStorage.Batch(actions =>
            {
                mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexName,
                                                                            context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                            context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
            });

            var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
            var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();

            try
            {
                if (singleStepReduceKeys.Length > 0)
                {
                    SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);
                }

                if (multiStepsReduceKeys.Length > 0)
                {
                    MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);
                }
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            finally
            {
                if (operationCanceled == false)
                {
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp);
                    });
                }
            }
        }
		protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
		{
			var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);
			if (viewGenerator == null)
				return;

			bool operationCanceled = false;
			var itemsToDelete = new ConcurrentSet<object>();

			IList<ReduceTypePerKey> mappedResultsInfo = null;
			transactionalStorage.Batch(actions =>
			{
				mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
					context.CurrentNumberOfItemsToReduceInSingleBatch,
					context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
			});

			var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
			var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();

			currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);
			try
			{
				if (singleStepReduceKeys.Length > 0)
				{
					Log.Debug("SingleStep reduce for keys: {0}",singleStepReduceKeys.Select(x => x + ","));
					SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);
				}

				if (multiStepsReduceKeys.Length > 0)
				{
					Log.Debug("MultiStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ","));
					MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);
				}
			}
			catch (OperationCanceledException)
			{
				operationCanceled = true;
			}
			finally
			{
				if (operationCanceled == false)
				{
					// whatever we succeeded in indexing or not, we have to update this
					// because otherwise we keep trying to re-index failed mapped results
					transactionalStorage.Batch(actions =>
					{
						var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

						if (latest == null)
							return;
						actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp);
					});
				}

				Index _;
				currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
			}
		}
Beispiel #4
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            List <MappedResultInfo> reduceKeyAndEtags = null;

            try
            {
                transactionalStorage.Batch(actions =>
                {
                    reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter
                                        (
                        indexToWorkOn.IndexName,
                        indexToWorkOn.LastIndexedEtag,
                        loadData: false,
                        // for reduce operations, we use the smaller value, rather than tuning stuff on the fly
                        // the reason for that is that we may have large number of map values to reduce anyway,
                        // so we don't want to try to load too much all at once.
                        take: context.Configuration.InitialNumberOfItemsToIndexInSingleBatch
                                        )
                                        .ToList();

                    if (log.IsDebugEnabled)
                    {
                        if (reduceKeyAndEtags.Count > 0)
                        {
                            log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName));
                        }
                        else
                        {
                            log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
                        }
                    }

                    new ReduceTask
                    {
                        Index      = indexToWorkOn.IndexName,
                        ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(),
                    }.Execute(context);
                });
            }
            finally
            {
                if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0)
                {
                    var lastByEtag = GetLastByEtag(reduceKeyAndEtags);
                    var lastEtag   = lastByEtag.Etag;

                    var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0)
                        {
                            actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp);
                        }
                    });
                }
            }
        }
 private void MarkIndexes(IndexToWorkOn indexToWorkOn, ComparableByteArray lastIndexedEtag, IStorageActionsAccessor actions, Guid lastEtag, DateTime lastModified)
 {
     if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) > 0)
     {
         return;
     }
     actions.Indexing.UpdateLastIndexed(indexToWorkOn.IndexName, lastEtag, lastModified);
 }
Beispiel #6
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            List <MappedResultInfo> reduceKeyAndEtags = null;

            try
            {
                transactionalStorage.Batch(actions =>
                {
                    reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter
                                        (
                        indexToWorkOn.IndexName,
                        indexToWorkOn.LastIndexedEtag,
                        loadData: false,
                        take: autoTuner.NumberOfItemsToIndexInSingleBatch
                                        )
                                        .ToList();

                    if (log.IsDebugEnabled)
                    {
                        if (reduceKeyAndEtags.Count > 0)
                        {
                            log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName));
                        }
                        else
                        {
                            log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
                        }
                    }

                    new ReduceTask
                    {
                        Index      = indexToWorkOn.IndexName,
                        ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(),
                    }.Execute(context);
                });
            }
            finally
            {
                if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0)
                {
                    var lastByEtag = GetLastByEtag(reduceKeyAndEtags);
                    var lastEtag   = lastByEtag.Etag;

                    var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0)
                        {
                            actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp);
                        }
                    });

                    autoTuner.AutoThrottleBatchSize(reduceKeyAndEtags.Count, reduceKeyAndEtags.Sum(x => x.Size));
                }
            }
        }
Beispiel #7
0
		protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
		{
			TimeSpan reduceDuration= TimeSpan.Zero;
			List<MappedResultInfo> reduceKeyAndEtags = null;
			try
			{
				transactionalStorage.Batch(actions =>
				{
					reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter
						(
							indexToWorkOn.IndexName,
							indexToWorkOn.LastIndexedEtag,
							loadData: false,
							take: autoTuner.NumberOfItemsToIndexInSingleBatch
						)
						.ToList();

					if(log.IsDebugEnabled)
					{
						if (reduceKeyAndEtags.Count > 0)
							log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName));
						else
							log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
					}

					var sw = Stopwatch.StartNew();
					new ReduceTask
					{
						Index = indexToWorkOn.IndexName,
						ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(),
					}.Execute(context);
					reduceDuration = sw.Elapsed;
				});
			}
			finally
			{
				if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0)
				{
					var lastByEtag = GetLastByEtag(reduceKeyAndEtags);
					var lastEtag = lastByEtag.Etag;

					var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());
					// whatever we succeeded in indexing or not, we have to update this
					// because otherwise we keep trying to re-index failed mapped results
					transactionalStorage.Batch(actions =>
					{
						if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0)
						{
							actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp);
						}
					});

					autoTuner.AutoThrottleBatchSize(reduceKeyAndEtags.Count, reduceKeyAndEtags.Sum(x => x.Size), reduceDuration);
				}
			}
		}
Beispiel #8
0
		protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
		{
			List<MappedResultInfo> reduceKeyAndEtags = null;
			try
			{
				transactionalStorage.Batch(actions =>
				{
					reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter
						(
							indexToWorkOn.IndexName,
							indexToWorkOn.LastIndexedEtag,
							loadData: false,
							// for reduce operations, we use the smaller value, rather than tuning stuff on the fly
							// the reason for that is that we may have large number of map values to reduce anyway, 
							// so we don't want to try to load too much all at once.
							take: context.Configuration.InitialNumberOfItemsToIndexInSingleBatch
						)
						.ToList();

					if(log.IsDebugEnabled)
					{
						if (reduceKeyAndEtags.Count > 0)
							log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName));
						else
							log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
					}

					new ReduceTask
					{
						Index = indexToWorkOn.IndexName,
						ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(),
					}.Execute(context);
				});
			}
			finally
			{
				if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0)
				{
					var lastByEtag = GetLastByEtag(reduceKeyAndEtags);
					var lastEtag = lastByEtag.Etag;

					var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());
					// whatever we succeeded in indexing or not, we have to update this
					// because otherwise we keep trying to re-index failed mapped results
					transactionalStorage.Batch(actions =>
					{
						if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0)
						{
							actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp);
						}
					});
				}
			}
		}
Beispiel #9
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            List<MappedResultInfo> reduceKeyAndEtags = null;
            try
            {
                transactionalStorage.Batch(actions =>
                {
                    reduceKeyAndEtags = actions.MappedResults.GetMappedResultsReduceKeysAfter
						(
							indexToWorkOn.IndexName,
							indexToWorkOn.LastIndexedEtag
						).ToList();

                    if(log.IsDebugEnabled)
                    {
						if (reduceKeyAndEtags.Count > 0)
							log.Debug(() => string.Format("Found {0} mapped results for keys [{1}] for index {2}", reduceKeyAndEtags.Count, string.Join(", ", reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName));
						else
							log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
                    }

                    new ReduceTask
                    {
                        Index = indexToWorkOn.IndexName,
                        ReduceKeys = reduceKeyAndEtags.Select(x => x.ReduceKey).Distinct().ToArray(),
                    }.Execute(context);
                });
            }
            finally
            {
                if (reduceKeyAndEtags != null && reduceKeyAndEtags.Count > 0)
                {
                    var lastByEtag = GetLastByEtag(reduceKeyAndEtags);
                    var lastEtag = lastByEtag.Etag;

                    var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) <= 0)
                        {
                            actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, lastByEtag.Etag, lastByEtag.Timestamp);
                        }
                    });
                }
            }
        }
Beispiel #10
0
        protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn, CancellationToken token)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);

            if (viewGenerator == null)
            {
                return(null);
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new ConcurrentSet <object>();

            var singleStepReduceKeys = new List <string>();
            var multiStepsReduceKeys = new List <string>();

            transactionalStorage.Batch(actions =>
            {
                var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
                                                                                context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                                context.NumberOfItemsToExecuteReduceInSingleStep, token);

                foreach (var key in mappedResultsInfo)
                {
                    token.ThrowIfCancellationRequested();

                    switch (key.OperationTypeToPerform)
                    {
                    case ReduceType.SingleStep:
                        singleStepReduceKeys.Add(key.ReduceKey);
                        break;

                    case ReduceType.MultiStep:
                        multiStepsReduceKeys.Add(key.ReduceKey);
                        break;
                    }
                }
            });

            currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);

            var performanceStats = new List <ReducingPerformanceStats>();

            try
            {
                if (singleStepReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("SingleStep reduce for keys: {0}", string.Join(",", singleStepReduceKeys));
                    }

                    var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, token);

                    performanceStats.Add(singleStepStats);
                }

                if (multiStepsReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("MultiStep reduce for keys: {0}", string.Join(",", multiStepsReduceKeys));
                    }

                    var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, token);

                    performanceStats.Add(multiStepStats);
                }
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            catch (AggregateException e)
            {
                var anyOperationsCanceled = e
                                            .InnerExceptions
                                            .OfType <OperationCanceledException>()
                                            .Any();

                if (anyOperationsCanceled == false)
                {
                    throw;
                }

                operationCanceled = true;
            }
            finally
            {
                var postReducingOperations = new ReduceLevelPeformanceStats
                {
                    Level   = -1,
                    Started = SystemTime.UtcNow
                };

                if (operationCanceled == false)
                {
                    var deletingScheduledReductionsDuration = new Stopwatch();
                    var storageCommitDuration = new Stopwatch();

                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        actions.BeforeStorageCommit += storageCommitDuration.Start;
                        actions.AfterStorageCommit  += storageCommitDuration.Stop;

                        ScheduledReductionInfo latest;

                        using (StopwatchScope.For(deletingScheduledReductionsDuration))
                        {
                            latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);
                        }

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexId, latest.Etag, latest.Timestamp);
                    });

                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                }

                postReducingOperations.Completed = SystemTime.UtcNow;
                postReducingOperations.Duration  = postReducingOperations.Completed - postReducingOperations.Started;

                performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
                {
                    LevelStats = new List <ReduceLevelPeformanceStats> {
                        postReducingOperations
                    }
                });

                Index _;
                currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
            }

            return(performanceStats.ToArray());
        }
Beispiel #11
0
        private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token)
        {
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    token.ThrowIfCancellationRequested();

                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }

                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList();
                    foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
                    {
                        actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
                    }
                }
            });

            var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep);


            for (int i = 0; i < 3; i++)
            {
                var level = i;

                var reduceLevelStats = new ReduceLevelPeformanceStats()
                {
                    Level   = level,
                    Started = SystemTime.UtcNow,
                };

                var reduceParams = new GetItemsToReduceParams(
                    index.IndexId,
                    new HashSet <string>(keysToReduce),
                    level,
                    true,
                    itemsToDelete);

                var gettingItemsToReduceDuration = new Stopwatch();
                var scheduleReductionsDuration   = new Stopwatch();
                var removeReduceResultsDuration  = new Stopwatch();
                var storageCommitDuration        = new Stopwatch();

                bool retry = true;
                while (retry && reduceParams.ReduceKeys.Count > 0)
                {
                    var reduceBatchAutoThrottlerId = Guid.NewGuid();
                    try
                    {
                        transactionalStorage.Batch(actions =>
                        {
                            token.ThrowIfCancellationRequested();

                            actions.BeforeStorageCommit += storageCommitDuration.Start;
                            actions.AfterStorageCommit  += storageCommitDuration.Stop;

                            var batchTimeWatcher = Stopwatch.StartNew();

                            reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                            int size = 0;

                            IList <MappedResultInfo> persistedResults;
                            var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                            using (StopwatchScope.For(gettingItemsToReduceDuration))
                            {
                                persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token);

                                foreach (var item in persistedResults)
                                {
                                    reduceKeys.Add(item.ReduceKey);
                                    size += item.Size;
                                }
                            }

                            if (persistedResults.Count == 0)
                            {
                                retry = false;
                                return;
                            }

                            var count = persistedResults.Count;

                            autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);

                            if (Log.IsDebugEnabled)
                            {
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                                  persistedResults.Count,
                                                                  string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                                  index.Index.PublicName, level, batchTimeWatcher.Elapsed));
                                }
                                else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.Index.PublicName);
                                }
                            }

                            token.ThrowIfCancellationRequested();


                            var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                foreach (var mappedResultInfo in requiredReduceNextTimeSet)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
                                }
                            }

                            if (level != 2)
                            {
                                var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                                using (StopwatchScope.For(scheduleReductionsDuration))
                                {
                                    foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet)
                                    {
                                        token.ThrowIfCancellationRequested();

                                        actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
                                    }
                                }
                            }

                            token.ThrowIfCancellationRequested();

                            var reduceTimeWatcher = Stopwatch.StartNew();

                            var results = persistedResults.Where(x => x.Data != null)
                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                          .ToList();

                            var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            context.MetricsCounters.ReducedPerSecond.Mark(results.Count());

                            reduceLevelStats.Add(performance);

                            var batchDuration = batchTimeWatcher.Elapsed;

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level);
                            }

                            autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
                        });
                    }
                    finally
                    {
                        long _;
                        autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
                    }
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;

                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));

                reducePerformance.LevelStats.Add(reduceLevelStats);
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                token.ThrowIfCancellationRequested();

                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }

            return(reducePerformance);
        }
Beispiel #12
0
        private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete)
        {
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }
                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
                    foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
                    {
                        actions.MapReduce.ScheduleReductions(index.IndexName, 0, result);
                    }
                }
            });

            for (int i = 0; i < 3; i++)
            {
                var level = i;

                var reduceParams = new GetItemsToReduceParams(
                    index.IndexName,
                    keysToReduce,
                    level,
                    true,
                    itemsToDelete);

                bool retry = true;
                while (retry && reduceParams.ReduceKeys.Count > 0)
                {
                    var reduceBatchAutoThrottlerId = Guid.NewGuid();
                    try
                    {
                        transactionalStorage.Batch(actions =>
                        {
                            context.CancellationToken.ThrowIfCancellationRequested();

                            var batchTimeWatcher = Stopwatch.StartNew();

                            reduceParams.Take    = context.CurrentNumberOfItemsToReduceInSingleBatch;
                            var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList();
                            if (persistedResults.Count == 0)
                            {
                                retry = false;
                                return;
                            }

                            var count = persistedResults.Count;
                            var size  = persistedResults.Sum(x => x.Size);
                            autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size);

                            if (Log.IsDebugEnabled)
                            {
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                                  persistedResults.Count,
                                                                  string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                                  index.IndexName, level, batchTimeWatcher.Elapsed));
                                }
                                else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.IndexName);
                                }
                            }

                            context.CancellationToken.ThrowIfCancellationRequested();

                            var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
                                                         .OrderBy(x => x.Bucket)
                                                         .Distinct()
                                                         .ToArray();
                            foreach (var mappedResultInfo in requiredReduceNextTime)
                            {
                                actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
                                                                      mappedResultInfo.Bucket);
                            }

                            if (level != 2)
                            {
                                var reduceKeysAndBuckets = requiredReduceNextTime
                                                           .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
                                                           .Distinct()
                                                           .ToArray();
                                foreach (var reduceKeysAndBucket in reduceKeysAndBuckets)
                                {
                                    actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket);
                                }
                            }

                            var results = persistedResults
                                          .Where(x => x.Data != null)
                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                          .ToArray();
                            var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey),
                                                                  StringComparer.InvariantCultureIgnoreCase);
                            context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

                            context.CancellationToken.ThrowIfCancellationRequested();
                            var reduceTimeWatcher = Stopwatch.StartNew();

                            context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            var batchDuration = batchTimeWatcher.Elapsed;
                            Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration,
                                      results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level);

                            autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
                        });
                    }
                    finally
                    {
                        long _;
                        autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _);
                    }
                }
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }
        }
        public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch)
        {
            context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count);

            var indexToWorkOn = new IndexToWorkOn()
            {
                Index = precomputedBatch.Index,
                IndexId = precomputedBatch.Index.indexId,
                LastIndexedEtag = Etag.Empty
            };

	        using (MapIndexingInProgress(new List<IndexToWorkOn> {indexToWorkOn}))
	        {
				var indexingBatchForIndex =
					FilterIndexes(new List<IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents,
									precomputedBatch.LastIndexed).FirstOrDefault();

				if (indexingBatchForIndex == null)
					return;

				IndexingBatchInfo batchInfo = null;

		        try
		        {
			        context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, precomputedBatch.Documents.Sum(x => x.SerializedSizeOnDisk), new List<string>()
			        {
				        indexToWorkOn.Index.PublicName
			        }, out batchInfo);

			        batchInfo.BatchType = BatchType.Precomputed;

			        if (Log.IsDebugEnabled)
			        {
				        Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}",
					        precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count);
			        }

			        HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified);
		        }
		        finally
		        {
			        var performance = indexingBatchForIndex.Batch.GetIndexingPerformance();

			        if (batchInfo != null)
			        {
				        if (performance != null)
					        batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance);

				        batchInfo.BatchCompleted();
			        }
		        }
	        }
        }
Beispiel #14
0
		private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete)
		{
			var needToMoveToMultiStep = new HashSet<string>();
			transactionalStorage.Batch(actions =>
			{
				foreach (var localReduceKey in keysToReduce)
				{
					var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

					if (lastPerformedReduceType != ReduceType.MultiStep)
						needToMoveToMultiStep.Add(localReduceKey);

					if (lastPerformedReduceType != ReduceType.SingleStep)
						continue;
					// we exceeded the limit of items to reduce in single step
					// now we need to schedule reductions at level 0 for all map results with given reduce key
					var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
					foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
					{
						actions.MapReduce.ScheduleReductions(index.IndexName, 0, result);
					}
				}
			});

			for (int i = 0; i < 3; i++)
			{
				var level = i;

				var reduceParams = new GetItemsToReduceParams(
					index.IndexName,
					keysToReduce,
					level,
					true,
					itemsToDelete);

				bool retry = true;
				while (retry && reduceParams.ReduceKeys.Count > 0)
				{
					var reduceBatchAutoThrottlerId = Guid.NewGuid();
					try
					{
						transactionalStorage.Batch(actions =>
						{
							context.CancellationToken.ThrowIfCancellationRequested();

							var batchTimeWatcher = Stopwatch.StartNew();

							reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;
							var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList();
							if (persistedResults.Count == 0)
							{
								retry = false;
								return;
							}

							var count = persistedResults.Count;
							var size = persistedResults.Sum(x => x.Size);
							autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size);

							if (Log.IsDebugEnabled)
							{
								if (persistedResults.Count > 0)
									Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
										persistedResults.Count,
										string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
										index.IndexName, level, batchTimeWatcher.Elapsed));
								else
									Log.Debug("No reduce keys found for {0}", index.IndexName);
							}

							context.CancellationToken.ThrowIfCancellationRequested();

							var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
								.OrderBy(x => x.Bucket)
								.Distinct()
								.ToArray();
							foreach (var mappedResultInfo in requiredReduceNextTime)
							{
								actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
									mappedResultInfo.Bucket);
							}

							if (level != 2)
							{
								var reduceKeysAndBuckets = requiredReduceNextTime
									.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
									.Distinct()
									.ToArray();
								foreach (var reduceKeysAndBucket in reduceKeysAndBuckets)
								{
									actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket);
								}
							}

							var results = persistedResults
								.Where(x => x.Data != null)
								.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
								.ToArray();
							var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey),
								StringComparer.InvariantCultureIgnoreCase);
							context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

							context.CancellationToken.ThrowIfCancellationRequested();
							var reduceTimeWatcher = Stopwatch.StartNew();

							context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

							var batchDuration = batchTimeWatcher.Elapsed;
							Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration,
								results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level);

							autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
						});
					}
					finally
					{
						long _;
						autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _);
					}
				}
			}

			foreach (var reduceKey in needToMoveToMultiStep)
			{
				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
										   actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
																					   ReduceType.MultiStep));
			}
		}
Beispiel #15
0
		private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List<object> itemsToDelete)
		{
			var result = new ReduceResultStats();
			var needToMoveToMultiStep = new HashSet<string>();
			transactionalStorage.Batch(actions =>
			{
				foreach (var localReduceKey in keysToReduce)
				{
					var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

					if (lastPerformedReduceType != ReduceType.MultiStep)
						needToMoveToMultiStep.Add(localReduceKey);

					if (lastPerformedReduceType != ReduceType.SingleStep)
						continue;
					// we exceeded the limit of items to reduce in single step
					// now we need to schedule reductions at level 0 for all map results with given reduce key
					var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
					actions.MapReduce.ScheduleReductions(index.IndexName, 0,
					                                     mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)));
				}
			});

			for (int i = 0; i < 3; i++)
			{
				var level = i;

				transactionalStorage.Batch(actions =>
				{
					context.CancellationToken.ThrowIfCancellationRequested();

					var persistedResults = actions.MapReduce.GetItemsToReduce
						(
							level: level,
							reduceKeys: keysToReduce,
							index: index.IndexName,
							itemsToDelete: itemsToDelete,
							loadData: true
						).ToList();

					var sp = Stopwatch.StartNew();

					result.count += persistedResults.Count;
					result.size += persistedResults.Sum(x => x.Size);

					if (Log.IsDebugEnabled)
					{
						if (persistedResults.Count > 0)
							Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
							                              persistedResults.Count,
							                              string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
							                              index.IndexName, level, sp.Elapsed));
						else
							Log.Debug("No reduce keys found for {0}", index.IndexName);
					}

					context.CancellationToken.ThrowIfCancellationRequested();

					var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
					                                             .OrderBy(x => x.Bucket)
					                                             .Distinct()
					                                             .ToArray();
					foreach (var mappedResultInfo in requiredReduceNextTime)
					{
						actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
						                                      mappedResultInfo.Bucket);
					}

					if (level != 2)
					{
						var reduceKeysAndBuckets = requiredReduceNextTime
							.Select(x => new ReduceKeyAndBucket(x.Bucket/1024, x.ReduceKey))
							.Distinct()
							.ToArray();
						actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets);
					}

					var results = persistedResults
						.Where(x => x.Data != null)
						.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
						.ToArray();
					var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey),
					                                     StringComparer.InvariantCultureIgnoreCase);
					context.ReducedPerSecIncreaseBy(results.Length);

					context.CancellationToken.ThrowIfCancellationRequested();
					sp = Stopwatch.StartNew();
					context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys);
					Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed,
					          results.Length, index.IndexName, sp.Elapsed);
				});
			}

			foreach (var reduceKey in needToMoveToMultiStep)
			{
				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
				                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
				                                                                       ReduceType.MultiStep));
			}

			return result;
		}
Beispiel #16
0
		private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token)
		{
			var needToMoveToMultiStep = new HashSet<string>();
			transactionalStorage.Batch(actions =>
			{
				foreach (var localReduceKey in keysToReduce)
				{
					token.ThrowIfCancellationRequested();

					var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);

					if (lastPerformedReduceType != ReduceType.MultiStep)
						needToMoveToMultiStep.Add(localReduceKey);

					if (lastPerformedReduceType != ReduceType.SingleStep)
						continue;

					// we exceeded the limit of items to reduce in single step
					// now we need to schedule reductions at level 0 for all map results with given reduce key
					var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList();
					foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
					{
						actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
					}
				}
			});

			var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep);

            var keysToReduceSet = new HashSet<string>(keysToReduce);

			for (int i = 0; i < 3; i++)
			{
				var level = i;

				var reduceLevelStats = new ReduceLevelPeformanceStats()
				{
					Level = level,
					Started = SystemTime.UtcNow,
				};

				var reduceParams = new GetItemsToReduceParams(
					index.IndexId,
                    keysToReduceSet,
					level,
					true,
					itemsToDelete);

				var gettingItemsToReduceDuration = new Stopwatch();
				var scheduleReductionsDuration = new Stopwatch();
				var removeReduceResultsDuration = new Stopwatch();
				var storageCommitDuration = new Stopwatch();

				bool retry = true;
				while (retry && reduceParams.ReduceKeys.Count > 0)
				{
					var reduceBatchAutoThrottlerId = Guid.NewGuid();
					try
					{
						transactionalStorage.Batch(actions =>
						{
							token.ThrowIfCancellationRequested();

							actions.BeforeStorageCommit += storageCommitDuration.Start;
							actions.AfterStorageCommit += storageCommitDuration.Stop;

							var batchTimeWatcher = Stopwatch.StartNew();

							reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                            int size = 0;                  
          
                            IList<MappedResultInfo> persistedResults;
                            var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
							using (StopwatchScope.For(gettingItemsToReduceDuration))
							{
                                persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token);                                

                                foreach (var item in persistedResults)
                                {
                                    reduceKeys.Add(item.ReduceKey);
                                    size += item.Size;
                                }
							}

                            if (persistedResults.Count == 0)
							{
								retry = false;
								return;
							}

                            var count = persistedResults.Count;

							autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);

							if (Log.IsDebugEnabled)
							{
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                        persistedResults.Count,
                                        string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                        index.IndexId, level, batchTimeWatcher.Elapsed));
                                }
								else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.IndexId);
                                }									
							}

							token.ThrowIfCancellationRequested();


                            var requiredReduceNextTimeSet = new HashSet<ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

							using (StopwatchScope.For(removeReduceResultsDuration))
							{
                                foreach (var mappedResultInfo in requiredReduceNextTimeSet)
								{
									token.ThrowIfCancellationRequested();

									actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
								}
							}

							if (level != 2)
							{
                                var reduceKeysAndBucketsSet = new HashSet<ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

								using (StopwatchScope.For(scheduleReductionsDuration))
								{
                                    foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet)
									{
										token.ThrowIfCancellationRequested();

										actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
									}
								}
							}

							token.ThrowIfCancellationRequested();

							var reduceTimeWatcher = Stopwatch.StartNew();

                            var results = persistedResults.Where(x => x.Data != null)
                                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data));                            

                            var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            context.MetricsCounters.ReducedPerSecond.Mark(results.Count());

							reduceLevelStats.Add(performance);

							var batchDuration = batchTimeWatcher.Elapsed;

                            if ( Log.IsDebugEnabled )
                            {
                                Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.IndexId, reduceTimeWatcher.Elapsed, level);
                            }

							autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
						});
					}
					finally
					{
						long _;
						autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
					}
				}

				reduceLevelStats.Completed = SystemTime.UtcNow;
				reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started;

				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));

				reducePerformance.LevelStats.Add(reduceLevelStats);
			}

			foreach (var reduceKey in needToMoveToMultiStep)
			{
				token.ThrowIfCancellationRequested();

				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
										   actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey,
																					   ReduceType.MultiStep));
			}

			return reducePerformance;
		}
Beispiel #17
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName);

            if (viewGenerator == null)
            {
                return;
            }

            TimeSpan reduceDuration    = TimeSpan.Zero;
            int      totalCount        = 0;
            int      totalSize         = 0;
            bool     operationCanceled = false;
            var      itemsToDelete     = new List <object>();

            IList <ReduceTypePerKey> mappedResultsInfo = null;

            transactionalStorage.Batch(actions =>
            {
                mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexName, context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
            });

            var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
            var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();

            var sw = Stopwatch.StartNew();

            try
            {
                if (singleStepReduceKeys.Length > 0)
                {
                    var reduceCounters = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);
                    totalCount += reduceCounters.count;
                    totalSize  += reduceCounters.size;
                }

                if (multiStepsReduceKeys.Length > 0)
                {
                    var reduceCounters = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);
                    totalCount += reduceCounters.count;
                    totalSize  += reduceCounters.size;
                }

                reduceDuration = sw.Elapsed;
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            finally
            {
                if (operationCanceled == false)
                {
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp);
                    });
                    autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration);
                }
            }
        }
Beispiel #18
0
        protected ReducingPerformanceStats[] HandleReduceForIndex(
            IndexToWorkOn indexToWorkOn, bool skipIncreasingBatchSize, CancellationToken token)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);

            if (viewGenerator == null)
            {
                return(null);
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new ConcurrentSet <object>();

            var singleStepReduceKeys = new List <string>();
            var multiStepsReduceKeys = new List <string>();

            transactionalStorage.Batch(actions =>
            {
                var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
                                                                                context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                                context.NumberOfItemsToExecuteReduceInSingleStep, token);

                foreach (var key in mappedResultsInfo)
                {
                    token.ThrowIfCancellationRequested();

                    switch (key.OperationTypeToPerform)
                    {
                    case ReduceType.SingleStep:
                        singleStepReduceKeys.Add(key.ReduceKey);
                        break;

                    case ReduceType.MultiStep:
                        multiStepsReduceKeys.Add(key.ReduceKey);
                        break;
                    }
                }
            });

            var performanceStats = new List <ReducingPerformanceStats>();

            try
            {
                if (singleStepReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("SingleStep reduce for keys: {0}", string.Join(",", singleStepReduceKeys));
                    }

                    var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, skipIncreasingBatchSize, token);

                    performanceStats.Add(singleStepStats);
                }

                if (multiStepsReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("MultiStep reduce for keys: {0}", string.Join(",", multiStepsReduceKeys));
                    }

                    var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, skipIncreasingBatchSize, token);

                    performanceStats.Add(multiStepStats);
                }
            }
            catch (IndexDoesNotExistsException)
            {
                // race condition -> index was deleted
                // we can ignore this
                operationCanceled = true;
            }
            catch (ObjectDisposedException)
            {
                // index was disposed
                // we can ignore this
                operationCanceled = true;
            }
            catch (Exception e)
            {
                if (HandleIfOutOfMemory(e, new OutOfMemoryDetails
                {
                    Index = indexToWorkOn.Index,
                    FailedItemsToProcessCount = singleStepReduceKeys.Count + multiStepsReduceKeys.Count,
                    IsReducing = true
                }))
                {
                    //if we got a OOME we need to decrease the batch size
                    operationCanceled = true;
                    return(null);
                }

                if (IsOperationCanceledException(e))
                {
                    operationCanceled = true;
                    return(null);
                }

                var message = $"Failed to reduce index: {indexToWorkOn.Index.PublicName} (id: {indexToWorkOn.IndexId}) " +
                              $"{singleStepReduceKeys.Count} single step keys and {multiStepsReduceKeys.Count} multi step keys. " +
                              "Skipping this batch (it won't be reduced)";

                indexToWorkOn.Index.AddIndexingError(e, message);
            }
            finally
            {
                var postReducingOperations = new ReduceLevelPeformanceStats
                {
                    Level   = -1,
                    Started = SystemTime.UtcNow
                };

                if (operationCanceled == false)
                {
                    // need to flush the changes made to the map-reduce index
                    // before commiting the deletions of the scheduled reductions
                    context.IndexStorage.FlushIndex(indexToWorkOn.IndexId, onlyAddIndexError: true);

                    var deletingScheduledReductionsDuration = new Stopwatch();
                    var storageCommitDuration = new Stopwatch();

                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        actions.BeforeStorageCommit += storageCommitDuration.Start;
                        actions.AfterStorageCommit  += storageCommitDuration.Stop;

                        ScheduledReductionInfo latest;

                        using (StopwatchScope.For(deletingScheduledReductionsDuration))
                        {
                            latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete, token);
                        }

                        if (latest == null)
                        {
                            return;
                        }

                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexId, latest.Etag, latest.Timestamp);
                    });

                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                }

                postReducingOperations.Completed = SystemTime.UtcNow;
                postReducingOperations.Duration  = postReducingOperations.Completed - postReducingOperations.Started;

                performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
                {
                    LevelStats = new List <ReduceLevelPeformanceStats> {
                        postReducingOperations
                    }
                });
            }

            return(performanceStats.ToArray());
        }
Beispiel #19
0
		private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
		                                        List<object> itemsToDelete)
		{
			var result = new ReduceResultStats();
			var needToMoveToSingleStep = new HashSet<string>();

			Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
			transactionalStorage.Batch(actions =>
			{
				var scheduledItems = actions.MapReduce.GetItemsToReduce
						(
							level: 0,
							reduceKeys: keysToReduce,
							index: index.IndexName,
							itemsToDelete: itemsToDelete,
							loadData: false
						).ToList();

				// Only look at the scheduled batch for this run, not the entire set of pending reductions.
				//var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray();

				foreach (var reduceKey in keysToReduce)
				{
					var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

					if (lastPerformedReduceType != ReduceType.SingleStep)
						needToMoveToSingleStep.Add(reduceKey);

					if (lastPerformedReduceType != ReduceType.MultiStep)
						continue;

					Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
						reduceKey);

					// now we are in single step but previously multi step reduce was performed for the given key
					var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

					// add scheduled items too to be sure we will delete reduce results of already deleted documents
					mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

					foreach (var mappedBucket in mappedBuckets.Distinct())
					{
						actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
						actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
					}
				}

				var mappedResults = actions.MapReduce.GetMappedResults(
						index.IndexName,
						keysToReduce, 
						loadData: true
					).ToList();

				result.count += mappedResults.Count;
				result.size += mappedResults.Sum(x => x.Size);

				var reduceKeys = new HashSet<string>(keysToReduce);

				mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

				var results = mappedResults
					.Where(x => x.Data != null)
					.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
					.ToArray();

				context.ReducedPerSecIncreaseBy(results.Length);

				context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys);
			});

			foreach (var reduceKey in needToMoveToSingleStep)
			{
				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
					actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
			}

			return result;
		}
        protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);

            if (viewGenerator == null)
            {
                return(null);
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new ConcurrentSet <object>();

            IList <ReduceTypePerKey> mappedResultsInfo = null;

            transactionalStorage.Batch(actions =>
            {
                mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
                                                                            context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                            context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
            });

            var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
            var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();

            currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);

            var performanceStats = new List <ReducingPerformanceStats>();

            try
            {
                if (singleStepReduceKeys.Length > 0)
                {
                    Log.Debug("SingleStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ","));
                    var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);

                    performanceStats.Add(singleStepStats);
                }

                if (multiStepsReduceKeys.Length > 0)
                {
                    Log.Debug("MultiStep reduce for keys: {0}", multiStepsReduceKeys.Select(x => x + ","));
                    var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);

                    performanceStats.Add(multiStepStats);
                }
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            finally
            {
                var postReducingOperations = new ReduceLevelPeformanceStats
                {
                    Level   = -1,
                    Started = SystemTime.UtcNow
                };

                if (operationCanceled == false)
                {
                    var deletingScheduledReductionsDuration = new Stopwatch();
                    var storageCommitDuration = new Stopwatch();

                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        actions.BeforeStorageCommit += storageCommitDuration.Start;
                        actions.AfterStorageCommit  += storageCommitDuration.Stop;

                        ScheduledReductionInfo latest;

                        using (StopwatchScope.For(deletingScheduledReductionsDuration))
                        {
                            latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);
                        }

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp);
                    });

                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                }

                postReducingOperations.Completed = SystemTime.UtcNow;
                postReducingOperations.Duration  = postReducingOperations.Completed - postReducingOperations.Started;

                performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
                {
                    LevelStats = new List <ReduceLevelPeformanceStats> {
                        postReducingOperations
                    }
                });

                Index _;
                currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
            }

            return(performanceStats.ToArray());
        }
Beispiel #21
0
        private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator,
                                                          ConcurrentSet <object> itemsToDelete, CancellationToken token)
        {
            var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >();

            if (Log.IsDebugEnabled)
            {
                Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce)));
            }

            var batchTimeWatcher = Stopwatch.StartNew();

            var reducingBatchThrottlerId = Guid.NewGuid();
            var reducePerformanceStats   = new ReducingPerformanceStats(ReduceType.SingleStep);
            var reduceLevelStats         = new ReduceLevelPeformanceStats
            {
                Started = SystemTime.UtcNow,
                Level   = 2
            };

            try
            {
                var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                var parallelProcessingStart = SystemTime.UtcNow;

                BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
                {
                    var parallelStats = new ParallelBatchStats
                    {
                        StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                    };

                    var localNeedToMoveToSingleStep = new HashSet <string>();
                    needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
                    var localKeys = new HashSet <string>();
                    while (enumerator.MoveNext())
                    {
                        token.ThrowIfCancellationRequested();

                        localKeys.Add(enumerator.Current);
                    }

                    transactionalStorage.Batch(actions =>
                    {
                        var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete)
                        {
                            Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                        };

                        var getItemsToReduceDuration = Stopwatch.StartNew();

                        int scheduledItemsSum   = 0;
                        int scheduledItemsCount = 0;
                        List <int> scheduledItemsMappedBuckets = new List <int>();
                        using (StopwatchScope.For(getItemsToReduceDuration))
                        {
                            foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token))
                            {
                                scheduledItemsMappedBuckets.Add(item.Bucket);
                                scheduledItemsSum += item.Size;
                                scheduledItemsCount++;
                            }
                        }

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds));

                        autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum);

                        if (scheduledItemsCount == 0)
                        {
                            // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
                            // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
                            // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
                            // That said, they might have smuggled in between versions, or something happened to cause them to be here.
                            // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on

                            Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce));

                            var deletingScheduledReductionsDuration = Stopwatch.StartNew();

                            using (StopwatchScope.For(deletingScheduledReductionsDuration))
                            {
                                foreach (var reduceKey in keysToReduce)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
                                    actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
                                }
                            }

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                        }

                        var removeReduceResultsDuration = new Stopwatch();

                        foreach (var reduceKey in localKeys)
                        {
                            token.ThrowIfCancellationRequested();

                            var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);

                            if (lastPerformedReduceType != ReduceType.SingleStep)
                            {
                                localNeedToMoveToSingleStep.Add(reduceKey);
                            }

                            if (lastPerformedReduceType != ReduceType.MultiStep)
                            {
                                continue;
                            }

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey);
                            }

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                // now we are in single step but previously multi step reduce was performed for the given key
                                var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token);

                                // add scheduled items too to be sure we will delete reduce results of already deleted documents
                                foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets))
                                {
                                    actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
                                    actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
                                }
                            }
                        }

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });
                });

                reduceLevelStats.Operations.Add(new ParallelPerformanceStats
                {
                    NumberOfThreads   = parallelOperations.Count,
                    DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                    BatchedOperations = parallelOperations.ToList()
                });

                var getMappedResultsDuration = new Stopwatch();

                var reductionPerformanceStats = new List <IndexingPerformanceStats>();

                var keysLeftToReduce = new HashSet <string>(keysToReduce);
                while (keysLeftToReduce.Count > 0)
                {
                    var keysReturned = new HashSet <string>();

                    // Try to diminish the allocations happening because of .Resize()
                    var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count);

                    context.TransactionalStorage.Batch(actions =>
                    {
                        var take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                        using (StopwatchScope.For(getMappedResultsDuration))
                        {
                            mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults);
                        }
                    });

                    var count = mappedResults.Count;

                    int size = 0;
                    foreach (var item in mappedResults)
                    {
                        item.Bucket = 0;
                        size       += item.Size;
                    }

                    var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray();

                    context.MetricsCounters.ReducedPerSecond.Mark(results.Length);

                    token.ThrowIfCancellationRequested();

                    var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count);

                    reductionPerformanceStats.Add(performance);

                    autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);
                }

                var needToMoveToSingleStep = new HashSet <string>();

                HashSet <string> set;
                while (needToMoveToSingleStepQueue.TryDequeue(out set))
                {
                    needToMoveToSingleStep.UnionWith(set);
                }

                foreach (var reduceKey in needToMoveToSingleStep)
                {
                    string localReduceKey = reduceKey;
                    transactionalStorage.Batch(actions =>
                                               actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep));
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index

                foreach (var stats in reductionPerformanceStats)
                {
                    reduceLevelStats.Add(stats);
                }

                reducePerformanceStats.LevelStats.Add(reduceLevelStats);
            }
            finally
            {
                long _;
                autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
            }

            return(reducePerformanceStats);
        }
Beispiel #22
0
		private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator,
												          ConcurrentSet<object> itemsToDelete, CancellationToken token)
		{
			var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>();

            if ( Log.IsDebugEnabled )
			    Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce)));

			var batchTimeWatcher = Stopwatch.StartNew();

			var reducingBatchThrottlerId = Guid.NewGuid();
			var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep);
			var reduceLevelStats = new ReduceLevelPeformanceStats
			{
				Started = SystemTime.UtcNow,
				Level = 2
			};

			try
			{
				var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

				var parallelProcessingStart = SystemTime.UtcNow;

				BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
				{
					var parallelStats = new ParallelBatchStats
					{
						StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
					};

					var localNeedToMoveToSingleStep = new HashSet<string>();
					needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
					var localKeys = new HashSet<string>();
					while (enumerator.MoveNext())
					{
						token.ThrowIfCancellationRequested();

						localKeys.Add(enumerator.Current);
					}

					transactionalStorage.Batch(actions =>
					{
						var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete)
						{
							Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway
						};

						var getItemsToReduceDuration = Stopwatch.StartNew();

                        int scheduledItemsSum = 0;
                        int scheduledItemsCount = 0;
                        List<int> scheduledItemsMappedBuckets = new List<int>();
						using (StopwatchScope.For(getItemsToReduceDuration))
						{
                            foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token))
                            {
                                scheduledItemsMappedBuckets.Add(item.Bucket);
                                scheduledItemsSum += item.Size;
                                scheduledItemsCount++;
                            }
						}

						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds));

						autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum);

                        if (scheduledItemsCount == 0)
						{						    
							// Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
							// and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
							// They shouldn't be here, and indeed, we remove them just a little down from here in this function.
							// That said, they might have smuggled in between versions, or something happened to cause them to be here.
							// In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on

                            Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce));

							var deletingScheduledReductionsDuration = Stopwatch.StartNew();

							using (StopwatchScope.For(deletingScheduledReductionsDuration))
							{
								foreach (var reduceKey in keysToReduce)
								{
									token.ThrowIfCancellationRequested();

									actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
									actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
								}
							}

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
						}

						var removeReduceResultsDuration = new Stopwatch();

						foreach (var reduceKey in localKeys)
						{
							token.ThrowIfCancellationRequested();

							var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);

							if (lastPerformedReduceType != ReduceType.SingleStep)
								localNeedToMoveToSingleStep.Add(reduceKey);

							if (lastPerformedReduceType != ReduceType.MultiStep)
								continue;

                            if ( Log.IsDebugEnabled )
                            {
                                Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey);
                            }

							using (StopwatchScope.For(removeReduceResultsDuration))
							{
                                // now we are in single step but previously multi step reduce was performed for the given key
                                var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token);        

                                // add scheduled items too to be sure we will delete reduce results of already deleted documents
								foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets))
								{
									actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
									actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
								}
							}
						}

						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));

						parallelOperations.Enqueue(parallelStats);
					});
				});

				reduceLevelStats.Operations.Add(new ParallelPerformanceStats
				{
					NumberOfThreads = parallelOperations.Count,
					DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
					BatchedOperations = parallelOperations.ToList()
				});

				var getMappedResultsDuration = new Stopwatch();				

				var reductionPerformanceStats = new List<IndexingPerformanceStats>();
                
                var keysLeftToReduce = new HashSet<string>(keysToReduce);                                              
				while (keysLeftToReduce.Count > 0)
				{
                    var keysReturned = new HashSet<string>();       

                    // Try to diminish the allocations happening because of .Resize()
                    var mappedResults = new List<MappedResultInfo>(keysLeftToReduce.Count);                             
                    
                    context.TransactionalStorage.Batch(actions =>
					{
						var take = context.CurrentNumberOfItemsToReduceInSingleBatch;

						using (StopwatchScope.For(getMappedResultsDuration))
						{
                            mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults);
						}
					});

					var count = mappedResults.Count;

                    int size = 0;
                    foreach ( var item in mappedResults )
                    {
                        item.Bucket = 0;
                        size += item.Size;
                    }
                        
                    var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray();

					context.MetricsCounters.ReducedPerSecond.Mark(results.Length);

					token.ThrowIfCancellationRequested();

                    var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count);

					reductionPerformanceStats.Add(performance);

					autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);
				}

				var needToMoveToSingleStep = new HashSet<string>();

				HashSet<string> set;
				while (needToMoveToSingleStepQueue.TryDequeue(out set))
				{
					needToMoveToSingleStep.UnionWith(set);
				}

				foreach (var reduceKey in needToMoveToSingleStep)
				{
					string localReduceKey = reduceKey;
					transactionalStorage.Batch(actions =>
						actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep));
				}

				reduceLevelStats.Completed = SystemTime.UtcNow;
				reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started;
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index

				foreach (var stats in reductionPerformanceStats)
				{
					reduceLevelStats.Add(stats);
				}

				reducePerformanceStats.LevelStats.Add(reduceLevelStats);
			}
			finally
			{
				long _;
				autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
			}

			return reducePerformanceStats;
		}
Beispiel #23
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName);

            if (viewGenerator == null)
            {
                return;
            }
            TimeSpan reduceDuration    = TimeSpan.Zero;
            int      totalCount        = 0;
            int      totalSize         = 0;
            bool     operationCanceled = false;
            var      itemsToDelete     = new List <object>();

            try
            {
                var sw = Stopwatch.StartNew();
                for (int i = 0; i < 3; i++)
                {
                    var level = i;
                    transactionalStorage.Batch(actions =>
                    {
                        context.CancellationToken.ThrowIfCancellationRequested();

                        var sp = Stopwatch.StartNew();
                        var persistedResults = actions.MapReduce.GetItemsToReduce
                                               (
                            take: context.CurrentNumberOfItemsToReduceInSingleBatch,
                            level: level,
                            index: indexToWorkOn.IndexName,
                            itemsToDelete: itemsToDelete
                                               )
                                               .ToList();

                        totalCount += persistedResults.Count;
                        totalSize  += persistedResults.Sum(x => x.Size);

                        if (Log.IsDebugEnabled)
                        {
                            if (persistedResults.Count > 0)
                            {
                                Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                              persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName, level, sp.Elapsed));
                            }
                            else
                            {
                                Log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
                            }
                        }

                        context.CancellationToken.ThrowIfCancellationRequested();

                        var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)).Distinct().ToArray();
                        foreach (var mappedResultInfo in requiredReduceNextTime)
                        {
                            actions.MapReduce.RemoveReduceResults(indexToWorkOn.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
                        }
                        if (level != 2)
                        {
                            var reduceKeysAndBukcets = requiredReduceNextTime
                                                       .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
                                                       .Distinct()
                                                       .ToArray();
                            actions.MapReduce.ScheduleReductions(indexToWorkOn.IndexName, level + 1, reduceKeysAndBukcets);
                        }

                        var results = persistedResults
                                      .Where(x => x.Data != null)
                                      .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                      .ToArray();
                        var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey),
                                                              StringComparer.InvariantCultureIgnoreCase);
                        context.ReducedPerSecIncreaseBy(results.Length);

                        context.CancellationToken.ThrowIfCancellationRequested();
                        sp = Stopwatch.StartNew();
                        context.IndexStorage.Reduce(indexToWorkOn.IndexName, viewGenerator, results, level, context, actions, reduceKeys);
                        Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed,
                                  results.Length, indexToWorkOn.IndexName, sp.Elapsed);
                    });
                }
                reduceDuration = sw.Elapsed;
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            finally
            {
                if (operationCanceled == false)
                {
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp);
                    });
                    autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration);
                }
            }
        }
Beispiel #24
0
		protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
		{
			var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName);
			if (viewGenerator == null)
				return;

			TimeSpan reduceDuration = TimeSpan.Zero;
			int totalCount = 0;
			int totalSize = 0;
			bool operationCanceled = false;
			var itemsToDelete = new List<object>();

			IList<ReduceTypePerKey> mappedResultsInfo = null;
			transactionalStorage.Batch(actions =>
			{
				mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexName, 
					context.CurrentNumberOfItemsToReduceInSingleBatch,
					context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
			});

			var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
			var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();

			var sw = Stopwatch.StartNew();

			try
			{
				if (singleStepReduceKeys.Length > 0)
				{
					var reduceCounters = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);
					totalCount += reduceCounters.count;
					totalSize += reduceCounters.size;
				}

				if (multiStepsReduceKeys.Length > 0)
				{
					var reduceCounters = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);
					totalCount += reduceCounters.count;
					totalSize += reduceCounters.size;
				}

				reduceDuration = sw.Elapsed;
			}
			catch (OperationCanceledException)
			{
				operationCanceled = true;
			}
			finally
			{
				if (operationCanceled == false)
				{
					// whatever we succeeded in indexing or not, we have to update this
					// because otherwise we keep trying to re-index failed mapped results
					transactionalStorage.Batch(actions =>
					{
						var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

						if(latest == null)
							return;
						actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp);
					});
					autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration);
				}
			}
		}
Beispiel #25
0
		private void MarkIndexes(IndexToWorkOn indexToWorkOn, ComparableByteArray lastIndexedEtag, IStorageActionsAccessor actions, Guid lastEtag, DateTime lastModified)
		{
			if (new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()).CompareTo(lastIndexedEtag) > 0)
				return;
			actions.Indexing.UpdateLastIndexed(indexToWorkOn.IndexName, lastEtag, lastModified);
		}
Beispiel #26
0
        private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
                                                   List <object> itemsToDelete)
        {
            var result = new ReduceResultStats();
            var needToMoveToSingleStep = new HashSet <string>();

            Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
            transactionalStorage.Batch(actions =>
            {
                var scheduledItems = actions.MapReduce.GetItemsToReduce
                                     (
                    level: 0,
                    reduceKeys: keysToReduce,
                    index: index.IndexName,
                    itemsToDelete: itemsToDelete,
                    loadData: false,
                    take: int.MaxValue,                                     // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                    itemsAlreadySeen: new HashSet <Tuple <string, int> >()
                                     ).ToList();

                // Only look at the scheduled batch for this run, not the entire set of pending reductions.
                //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray();

                foreach (var reduceKey in keysToReduce)
                {
                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        needToMoveToSingleStep.Add(reduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        continue;
                    }

                    Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
                              reduceKey);

                    // now we are in single step but previously multi step reduce was performed for the given key
                    var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

                    // add scheduled items too to be sure we will delete reduce results of already deleted documents
                    mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

                    foreach (var mappedBucket in mappedBuckets.Distinct())
                    {
                        actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
                        actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
                    }
                }

                var mappedResults = actions.MapReduce.GetMappedResults(
                    index.IndexName,
                    keysToReduce,
                    loadData: true
                    ).ToList();

                result.count += mappedResults.Count;
                result.size  += mappedResults.Sum(x => x.Size);

                var reduceKeys = new HashSet <string>(keysToReduce);

                mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

                var results = mappedResults
                              .Where(x => x.Data != null)
                              .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                              .ToArray();

                context.ReducedPerSecIncreaseBy(results.Length);

                context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys);
            });

            foreach (var reduceKey in needToMoveToSingleStep)
            {
                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
            }

            return(result);
        }
Beispiel #27
0
		private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
												List<object> itemsToDelete)
		{
			var needToMoveToSingleStep = new HashSet<string>();

			Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
			var batchTimeWatcher = Stopwatch.StartNew();
			var count = 0;
			var size = 0;
			var state = new ConcurrentQueue <Tuple<HashSet<string>, List<MappedResultInfo>>>();
			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
			{
				var localKeys = new HashSet<string>();
				while (enumerator.MoveNext())
				{
					localKeys.Add(enumerator.Current);
				}
				transactionalStorage.Batch(actions =>
				{
					var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0,
																			loadData: false,
																			itemsToDelete: itemsToDelete)
					{
						Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway
					};
					var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList();

					foreach (var reduceKey in localKeys)
					{
						var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

						if (lastPerformedReduceType != ReduceType.SingleStep)
							needToMoveToSingleStep.Add(reduceKey);

						if (lastPerformedReduceType != ReduceType.MultiStep)
							continue;

						Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
							reduceKey);

						// now we are in single step but previously multi step reduce was performed for the given key
						var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

						// add scheduled items too to be sure we will delete reduce results of already deleted documents
						mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

						foreach (var mappedBucket in mappedBuckets.Distinct())
						{
							actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
							actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
						}
					}

					var mappedResults = actions.MapReduce.GetMappedResults(
							index.IndexName,
							localKeys,
							loadData: true
						).ToList();

					Interlocked.Add(ref count, mappedResults.Count);
					Interlocked.Add(ref size, mappedResults.Sum(x => x.Size));

					mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

					state.Enqueue(Tuple.Create(localKeys, mappedResults));
				});
			});

			var reduceKeys = new HashSet<string>(state.SelectMany(x=>x.Item1));

			var results = state.SelectMany(x=>x.Item2)
						.Where(x => x.Data != null)
						.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
						.ToArray();
			context.ReducedPerSecIncreaseBy(results.Length);

			context.TransactionalStorage.Batch(actions => 
				context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys)
				);
			
			autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);

			foreach (var reduceKey in needToMoveToSingleStep)
			{
				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
					actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
			}
		}
Beispiel #28
0
        private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete)
        {
            var result = new ReduceResultStats();
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }
                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
                    actions.MapReduce.ScheduleReductions(index.IndexName, 0,
                                                         mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)));
                }
            });

            for (int i = 0; i < 3; i++)
            {
                var level = i;

                bool retry            = true;
                var  itemsAlreadySeen = new HashSet <Tuple <string, int> >();
                while (retry)
                {
                    transactionalStorage.Batch(actions =>
                    {
                        context.CancellationToken.ThrowIfCancellationRequested();

                        var sp = Stopwatch.StartNew();

                        var persistedResults = actions.MapReduce.GetItemsToReduce
                                               (
                            level: level,
                            reduceKeys: keysToReduce,
                            index: index.IndexName,
                            itemsToDelete: itemsToDelete,
                            loadData: true,
                            take: context.CurrentNumberOfItemsToReduceInSingleBatch,
                            itemsAlreadySeen: itemsAlreadySeen
                                               ).ToList();
                        if (persistedResults.Count == 0)
                        {
                            retry = false;
                            return;
                        }

                        result.count += persistedResults.Count;
                        result.size  += persistedResults.Sum(x => x.Size);

                        if (Log.IsDebugEnabled)
                        {
                            if (persistedResults.Count > 0)
                            {
                                Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                              persistedResults.Count,
                                                              string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                              index.IndexName, level, sp.Elapsed));
                            }
                            else
                            {
                                Log.Debug("No reduce keys found for {0}", index.IndexName);
                            }
                        }

                        context.CancellationToken.ThrowIfCancellationRequested();

                        var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
                                                     .OrderBy(x => x.Bucket)
                                                     .Distinct()
                                                     .ToArray();
                        foreach (var mappedResultInfo in requiredReduceNextTime)
                        {
                            actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
                                                                  mappedResultInfo.Bucket);
                        }

                        if (level != 2)
                        {
                            var reduceKeysAndBuckets = requiredReduceNextTime
                                                       .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
                                                       .Distinct()
                                                       .ToArray();
                            actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets);
                        }

                        var results = persistedResults
                                      .Where(x => x.Data != null)
                                      .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                      .ToArray();
                        var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey),
                                                              StringComparer.InvariantCultureIgnoreCase);
                        context.ReducedPerSecIncreaseBy(results.Length);

                        context.CancellationToken.ThrowIfCancellationRequested();
                        sp = Stopwatch.StartNew();
                        context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys);
                        Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed,
                                  results.Length, index.IndexName, sp.Elapsed);
                    });
                }
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }

            return(result);
        }
Beispiel #29
0
		protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn, CancellationToken token)
		{
			var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);
			if (viewGenerator == null)
				return null;

			bool operationCanceled = false;
			var itemsToDelete = new ConcurrentSet<object>();

			var singleStepReduceKeys = new List<string>();
			var multiStepsReduceKeys = new List<string>();

			transactionalStorage.Batch(actions =>
			{
                var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId, 
                                                                    context.CurrentNumberOfItemsToReduceInSingleBatch, 
                                                                    context.NumberOfItemsToExecuteReduceInSingleStep, token);

			    foreach (var key in mappedResultsInfo)
			    {
				    token.ThrowIfCancellationRequested();

				    switch (key.OperationTypeToPerform)
				    {
					    case ReduceType.SingleStep:
						    singleStepReduceKeys.Add(key.ReduceKey);
						    break;
					    case ReduceType.MultiStep:
						    multiStepsReduceKeys.Add(key.ReduceKey);
						    break;
				    }
			    }
			});

			currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);

			var performanceStats = new List<ReducingPerformanceStats>();

			try
			{
				if (singleStepReduceKeys.Count > 0)
				{
                    if ( Log.IsDebugEnabled )
                        Log.Debug("SingleStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ","));
                    
					var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, token);

					performanceStats.Add(singleStepStats);
				}

				if (multiStepsReduceKeys.Count > 0)
				{
                    if ( Log.IsDebugEnabled )
                        Log.Debug("MultiStep reduce for keys: {0}", multiStepsReduceKeys.Select(x => x + ","));

					var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, token);

					performanceStats.Add(multiStepStats);
				}
			}
			catch (OperationCanceledException)
			{
				operationCanceled = true;
			}
			catch (AggregateException e)
			{
				var anyOperationsCanceled = e
					.InnerExceptions
					.OfType<OperationCanceledException>()
					.Any();

				if (anyOperationsCanceled == false) 
					throw;

				operationCanceled = true;
			}
			finally
			{
				var postReducingOperations = new ReduceLevelPeformanceStats
				{
					Level = -1,
					Started = SystemTime.UtcNow
				};

				if (operationCanceled == false)
				{
					var deletingScheduledReductionsDuration = new Stopwatch();
					var storageCommitDuration = new Stopwatch();

					// whatever we succeeded in indexing or not, we have to update this
					// because otherwise we keep trying to re-index failed mapped results
					transactionalStorage.Batch(actions =>
					{
						actions.BeforeStorageCommit += storageCommitDuration.Start;
						actions.AfterStorageCommit += storageCommitDuration.Stop;

						ScheduledReductionInfo latest;

						using (StopwatchScope.For(deletingScheduledReductionsDuration))
						{
							latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);
						}

						if (latest == null)
							return;
						actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp);
					});

					postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
					postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
				}

				postReducingOperations.Completed = SystemTime.UtcNow;
				postReducingOperations.Duration = postReducingOperations.Completed - postReducingOperations.Started;

				performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
				{
					LevelStats = new List<ReduceLevelPeformanceStats> { postReducingOperations }
				});

				Index _;
				currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
			}

			return performanceStats.ToArray();
		}
Beispiel #30
0
		protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
		{
			var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName);
			if (viewGenerator == null)
				return;
			TimeSpan reduceDuration = TimeSpan.Zero;
			int totalCount = 0;
			int totalSize = 0;
			bool operationCanceled = false;
			var itemsToDelete = new List<object>();
			try
			{
				var sw = Stopwatch.StartNew();
				for (int i = 0; i < 3; i++)
				{
					var level = i;
					transactionalStorage.Batch(actions =>
					{
						context.CancellationToken.ThrowIfCancellationRequested();

						var sp = Stopwatch.StartNew();
						var persistedResults = actions.MapReduce.GetItemsToReduce
							(
								take: context.CurrentNumberOfItemsToReduceInSingleBatch,
								level: level,
								index: indexToWorkOn.IndexName,
								itemsToDelete: itemsToDelete
							)
							.ToList();

						totalCount += persistedResults.Count;
						totalSize += persistedResults.Sum(x => x.Size);

						if (Log.IsDebugEnabled)
						{
							if (persistedResults.Count > 0)
								Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
								persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName, level, sp.Elapsed));
							else
								Log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
						}

						context.CancellationToken.ThrowIfCancellationRequested();

						var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
							.OrderBy(x=>x.Bucket)
							.Distinct()
							.ToArray();
						foreach (var mappedResultInfo in requiredReduceNextTime)
						{
							actions.MapReduce.RemoveReduceResults(indexToWorkOn.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
						}
						if (level != 2)
						{
							var reduceKeysAndBukcets = requiredReduceNextTime
								.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
								.Distinct()
								.ToArray();
							actions.MapReduce.ScheduleReductions(indexToWorkOn.IndexName, level + 1, reduceKeysAndBukcets);
						}

						var results = persistedResults
							.Where(x => x.Data != null)
							.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
							.ToArray();
						var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey),
															 StringComparer.InvariantCultureIgnoreCase);
						context.ReducedPerSecIncreaseBy(results.Length);

						context.CancellationToken.ThrowIfCancellationRequested();
						sp = Stopwatch.StartNew();
						context.IndexStorage.Reduce(indexToWorkOn.IndexName, viewGenerator, results, level, context, actions, reduceKeys);
						Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed,
										results.Length, indexToWorkOn.IndexName, sp.Elapsed);
					});
				}
				reduceDuration = sw.Elapsed;
			}
			catch (OperationCanceledException)
			{
				operationCanceled = true;
			}
			finally
			{
				if (operationCanceled == false)
				{
					// whatever we succeeded in indexing or not, we have to update this
					// because otherwise we keep trying to re-index failed mapped results
					transactionalStorage.Batch(actions =>
					{
						var latest= actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

						if(latest == null)
							return;
						actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp);
					});
					autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration);
				}
			}
		}
Beispiel #31
0
        public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch)
        {
            context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count);

            var indexToWorkOn = new IndexToWorkOn()
            {
                Index           = precomputedBatch.Index,
                IndexId         = precomputedBatch.Index.indexId,
                LastIndexedEtag = Etag.Empty
            };

            using (MapIndexingInProgress(new List <IndexToWorkOn> {
                indexToWorkOn
            }))
            {
                var indexingBatchForIndex =
                    FilterIndexes(new List <IndexToWorkOn>()
                {
                    indexToWorkOn
                }, precomputedBatch.Documents,
                                  precomputedBatch.LastIndexed).FirstOrDefault();

                if (indexingBatchForIndex == null)
                {
                    return;
                }

                IndexingBatchInfo batchInfo = null;

                try
                {
                    context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, precomputedBatch.Documents.Sum(x => x.SerializedSizeOnDisk), new List <string>()
                    {
                        indexToWorkOn.Index.PublicName
                    }, out batchInfo);

                    batchInfo.BatchType = BatchType.Precomputed;

                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}",
                                  precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count);
                    }

                    HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified);
                }
                finally
                {
                    var performance = indexingBatchForIndex.Batch.GetIndexingPerformance();

                    if (batchInfo != null)
                    {
                        if (performance != null)
                        {
                            batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance);
                        }

                        batchInfo.BatchCompleted();
                    }
                }
            }
        }
Beispiel #32
0
        public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch)
        {
            context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count);

            var indexToWorkOn = new IndexToWorkOn()
            {
                Index = precomputedBatch.Index,
                IndexId = precomputedBatch.Index.indexId,
                LastIndexedEtag = Etag.Empty
            };

            var indexingBatchForIndex =
                FilterIndexes(new List<IndexToWorkOn>() {indexToWorkOn}, precomputedBatch.Documents,
                              precomputedBatch.LastIndexed).FirstOrDefault();

            if (indexingBatchForIndex == null)
                return;

            if (Log.IsDebugEnabled)
            {
                Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}",
                          precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count);
            }

            HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified);
        }
Beispiel #33
0
		private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
												ConcurrentSet<object> itemsToDelete)
		{
			var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>();

			Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
			var batchTimeWatcher = Stopwatch.StartNew();
			var count = 0;
			var size = 0;
			var state = new ConcurrentQueue<Tuple<HashSet<string>, List<MappedResultInfo>>>();
			var reducingBatchThrottlerId = Guid.NewGuid();
			try
			{
				BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
				{
					var localNeedToMoveToSingleStep = new HashSet<string>();
					needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
					var localKeys = new HashSet<string>();
					while (enumerator.MoveNext())
					{
						localKeys.Add(enumerator.Current);
					}

					transactionalStorage.Batch(actions =>
					{
						var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0,
							loadData: false,
							itemsToDelete: itemsToDelete)
						{
							Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway
						};

					
						var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList();
						autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size));
						if (scheduledItems.Count == 0)
						{
							if (Log.IsWarnEnabled)
							{
								Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])",
									string.Join(", ", keysToReduce));
							}
							// Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
							// and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
							// They shouldn't be here, and indeed, we remove them just a little down from here in this function.
							// That said, they might bave smuggled in between versions, or something happened to cause them to be here.
							// In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on
							foreach (var reduceKey in keysToReduce)
							{
								actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey);
								actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey);
							}
						}

						foreach (var reduceKey in localKeys)
						{
							var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

							if (lastPerformedReduceType != ReduceType.SingleStep)
								localNeedToMoveToSingleStep.Add(reduceKey);

							if (lastPerformedReduceType != ReduceType.MultiStep)
								continue;

							Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
								reduceKey);

							// now we are in single step but previously multi step reduce was performed for the given key
							var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

							// add scheduled items too to be sure we will delete reduce results of already deleted documents
							mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

							foreach (var mappedBucket in mappedBuckets.Distinct())
							{
								actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
								actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
							}
						}

						var mappedResults = actions.MapReduce.GetMappedResults(
							index.IndexName,
							localKeys,
							loadData: true
							).ToList();

						Interlocked.Add(ref count, mappedResults.Count);
						Interlocked.Add(ref size, mappedResults.Sum(x => x.Size));

						mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

						state.Enqueue(Tuple.Create(localKeys, mappedResults));
					});
				});

				var reduceKeys = new HashSet<string>(state.SelectMany(x => x.Item1));

				var results = state.SelectMany(x => x.Item2)
					.Where(x => x.Data != null)
					.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
					.ToArray();
				context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

				context.TransactionalStorage.Batch(actions =>
					context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x=>x.Item2.Count))
					);

				autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);

				var needToMoveToSingleStep = new HashSet<string>();
				HashSet<string> set;
				while (needToMoveToSingleStepQueue.TryDequeue(out set))
				{
					needToMoveToSingleStep.UnionWith(set);
				}

				foreach (var reduceKey in needToMoveToSingleStep)
				{
					string localReduceKey = reduceKey;
					transactionalStorage.Batch(actions =>
						actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
				}
			}
			finally
			{
				long _;
				autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _);
			}
		}
Beispiel #34
0
        public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count);

            var indexToWorkOn = new IndexToWorkOn
            {
                Index = precomputedBatch.Index,
                IndexId = precomputedBatch.Index.indexId,
                LastIndexedEtag = Etag.Empty
            };

            using (LogContext.WithDatabase(context.DatabaseName))
            using (MapIndexingInProgress(new List<IndexToWorkOn> { indexToWorkOn }))
            {
                IndexingBatchForIndex indexingBatchForIndex;
                if (precomputedBatch.Documents.Count > 0)
                {
                    indexingBatchForIndex = 
                        FilterIndexes(
                                new List<IndexToWorkOn> {indexToWorkOn}, 
                                precomputedBatch.Documents,
                                precomputedBatch.LastIndexed)
                          .FirstOrDefault();
                }
                else
                {
                    indexingBatchForIndex = new IndexingBatchForIndex
                    {
                        Batch = new IndexingBatch(precomputedBatch.LastIndexed),
                        Index = precomputedBatch.Index,
                        IndexId = precomputedBatch.Index.indexId,
                        LastIndexedEtag = precomputedBatch.LastIndexed
                    };
                }

                if (indexingBatchForIndex == null)
                    return;

                IndexingBatchInfo batchInfo = null;
                IndexingPerformanceStats performance = null;
                try
                {
                    batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List<string>
                    {
                        indexToWorkOn.Index.PublicName
                    });

                    batchInfo.BatchType = BatchType.Precomputed;

                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}",
                            precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count);
                    }

                    performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token);
                }
                finally
                {
                    if (batchInfo != null)
                    {
                        if (performance != null)
                            batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance);

                        context.ReportIndexingBatchCompleted(batchInfo);
                    }
                }
            }

            indexReplacer.ReplaceIndexes(new []{ indexToWorkOn.IndexId });
        }
        public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count);

            var indexToWorkOn = new IndexToWorkOn
            {
                Index           = precomputedBatch.Index,
                IndexId         = precomputedBatch.Index.indexId,
                LastIndexedEtag = Etag.Empty
            };

            using (LogContext.WithDatabase(context.DatabaseName))
                using (MapIndexingInProgress(new List <IndexToWorkOn> {
                    indexToWorkOn
                }))
                {
                    var indexingBatchForIndex =
                        FilterIndexes(new List <IndexToWorkOn> {
                        indexToWorkOn
                    }, precomputedBatch.Documents,
                                      precomputedBatch.LastIndexed).FirstOrDefault();

                    if (indexingBatchForIndex == null)
                    {
                        return;
                    }

                    IndexingBatchInfo        batchInfo   = null;
                    IndexingPerformanceStats performance = null;
                    try
                    {
                        batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List <string>
                        {
                            indexToWorkOn.Index.PublicName
                        });

                        batchInfo.BatchType = BatchType.Precomputed;

                        if (Log.IsDebugEnabled)
                        {
                            Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}",
                                      precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count);
                        }

                        performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token);
                    }
                    finally
                    {
                        if (batchInfo != null)
                        {
                            if (performance != null)
                            {
                                batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance);
                            }

                            context.ReportIndexingBatchCompleted(batchInfo);
                        }
                    }
                }

            indexReplacer.ReplaceIndexes(new [] { indexToWorkOn.IndexId });
        }
Beispiel #36
0
        private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
                                      ConcurrentSet <object> itemsToDelete)
        {
            var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >();

            Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
            var batchTimeWatcher         = Stopwatch.StartNew();
            var count                    = 0;
            var size                     = 0;
            var state                    = new ConcurrentQueue <Tuple <HashSet <string>, List <MappedResultInfo> > >();
            var reducingBatchThrottlerId = Guid.NewGuid();

            try
            {
                BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
                {
                    var localNeedToMoveToSingleStep = new HashSet <string>();
                    needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
                    var localKeys = new HashSet <string>();
                    while (enumerator.MoveNext())
                    {
                        localKeys.Add(enumerator.Current);
                    }

                    transactionalStorage.Batch(actions =>
                    {
                        var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0,
                                                                                loadData: false,
                                                                                itemsToDelete: itemsToDelete)
                        {
                            Take = int.MaxValue                            // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                        };


                        var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList();
                        autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size));
                        if (scheduledItems.Count == 0)
                        {
                            if (Log.IsWarnEnabled)
                            {
                                Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])",
                                         string.Join(", ", keysToReduce));
                            }
                            // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
                            // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
                            // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
                            // That said, they might bave smuggled in between versions, or something happened to cause them to be here.
                            // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on
                            foreach (var reduceKey in keysToReduce)
                            {
                                actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey);
                                actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey);
                            }
                        }

                        foreach (var reduceKey in localKeys)
                        {
                            var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

                            if (lastPerformedReduceType != ReduceType.SingleStep)
                            {
                                localNeedToMoveToSingleStep.Add(reduceKey);
                            }

                            if (lastPerformedReduceType != ReduceType.MultiStep)
                            {
                                continue;
                            }

                            Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
                                      reduceKey);

                            // now we are in single step but previously multi step reduce was performed for the given key
                            var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

                            // add scheduled items too to be sure we will delete reduce results of already deleted documents
                            mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

                            foreach (var mappedBucket in mappedBuckets.Distinct())
                            {
                                actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
                                actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
                            }
                        }

                        var mappedResults = actions.MapReduce.GetMappedResults(
                            index.IndexName,
                            localKeys,
                            loadData: true
                            ).ToList();

                        Interlocked.Add(ref count, mappedResults.Count);
                        Interlocked.Add(ref size, mappedResults.Sum(x => x.Size));

                        mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

                        state.Enqueue(Tuple.Create(localKeys, mappedResults));
                    });
                });

                var reduceKeys = new HashSet <string>(state.SelectMany(x => x.Item1));

                var results = state.SelectMany(x => x.Item2)
                              .Where(x => x.Data != null)
                              .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                              .ToArray();
                context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

                context.TransactionalStorage.Batch(actions =>
                                                   context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x => x.Item2.Count))
                                                   );

                autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);

                var needToMoveToSingleStep = new HashSet <string>();
                HashSet <string> set;
                while (needToMoveToSingleStepQueue.TryDequeue(out set))
                {
                    needToMoveToSingleStep.UnionWith(set);
                }

                foreach (var reduceKey in needToMoveToSingleStep)
                {
                    string localReduceKey = reduceKey;
                    transactionalStorage.Batch(actions =>
                                               actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
                }
            }
            finally
            {
                long _;
                autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _);
            }
        }