Exemple #1
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts)
        {
            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List <JsonDocument>();

            TransactionalStorage.Batch(actions =>
            {
                var query = GetQueryForAllMatchingDocumentsForIndex(generator);

                using (DocumentCacher.SkipSetDocumentsInDocumentCache())
                    using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                        using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery
                        {
                            Query = query,
                            PageSize = pageSize
                        }, actions, linked)
                        {
                            ShouldSkipDuplicateChecking = true
                        })
                        {
                            op.Init();

                            //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results
                            if (index.IsTestIndex == false && op.Header.TotalResults > pageSize)
                            {
                                // we don't apply this optimization if the total number of results
                                // to index is more than the max numbers to index in a single batch.
                                // The idea here is that we need to keep the amount
                                // of memory we use to a manageable level even when introducing a new index to a BIG
                                // database
                                try
                                {
                                    cts.Cancel();
                                    // we have to run just a little bit of the query to properly setup the disposal
                                    op.Execute(o => { });
                                }
                                catch (OperationCanceledException)
                                {
                                }
                                return;
                            }

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs",
                                          index, op.Header.TotalResults);
                            }

                            var totalLoadedDocumentSize = 0;
                            const int totalSizeToCheck  = 16 * 1024 * 1024; //16MB
                            var localLoadedDocumentSize = 0;
                            op.Execute(document =>
                            {
                                var metadata     = document.Value <RavenJObject>(Constants.Metadata);
                                var key          = metadata.Value <string>("@id");
                                var etag         = Etag.Parse(metadata.Value <string>("@etag"));
                                var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified));
                                document.Remove(Constants.Metadata);
                                var serializedSizeOnDisk = metadata.Value <int>(Constants.SerializedSizeOnDisk);
                                metadata.Remove(Constants.SerializedSizeOnDisk);

                                var doc = new JsonDocument
                                {
                                    DataAsJson           = document,
                                    Etag                 = etag,
                                    Key                  = key,
                                    SerializedSizeOnDisk = serializedSizeOnDisk,
                                    LastModified         = lastModified,
                                    SkipDeleteFromIndex  = true,
                                    Metadata             = metadata
                                };

                                docsToIndex.Add(doc);
                                totalLoadedDocumentSize += serializedSizeOnDisk;
                                localLoadedDocumentSize += serializedSizeOnDisk;

                                if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes)
                                {
                                    var error = string.Format(
                                        @"Aborting applying precomputed batch for index id: {0}, name: {1}
                                    because we have {2}mb of documents that were fetched
                                    and the configured max data to fetch is {3}mb",
                                        index.indexId, index.PublicName, totalLoadedDocumentSize,
                                        Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes / 1024 / 1024);

                                    //we are aborting operation, so don't keep the references
                                    docsToIndex.Clear();
                                    throw new TotalDataSizeExceededException(error);
                                }


                                if (localLoadedDocumentSize <= totalSizeToCheck)
                                {
                                    return;
                                }

                                localLoadedDocumentSize = 0;

                                if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb)
                                {
                                    var error = string.Format(
                                        @"Aborting applying precomputed batch for index id: {0}, name: {1}
                                    because we have {2}mb of available memory and the available memory for processing is: {3}mb",
                                        index.indexId, index.PublicName,
                                        MemoryStatistics.AvailableMemoryInMb, Database.Configuration.MemoryLimitForProcessingInMb);

                                    //we are aborting operation, so don't keep the references
                                    docsToIndex.Clear();
                                    throw new TotalDataSizeExceededException(error);
                                }
                            });

                            result = new PrecomputedIndexingBatch
                            {
                                LastIndexed  = op.Header.IndexEtag,
                                LastModified = op.Header.IndexTimestamp,
                                Documents    = docsToIndex,
                                Index        = index
                            };
                        }
            });

            if (result != null && result.Documents != null && result.Documents.Count >= 0)
            {
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                {
                    Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token);

                    if (index.IsTestIndex)
                    {
                        TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId));
                    }
                }
            }
        }
        private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce,
                                                         AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, bool skipIncreasingBatchSize, CancellationToken token)
        {
            var needToMoveToMultiStep = new HashSet <string>();
            var alreadyMultiStep      = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    token.ThrowIfCancellationRequested();

                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType == ReduceType.MultiStep)
                    {
                        alreadyMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }

                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList();
                    foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
                    {
                        actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
                    }
                }
            });

            var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep);


            for (int i = 0; i < 3; i++)
            {
                var level = i;

                var reduceLevelStats = new ReduceLevelPeformanceStats()
                {
                    Level   = level,
                    Started = SystemTime.UtcNow,
                };

                var reduceParams = new GetItemsToReduceParams(
                    index.IndexId,
                    new HashSet <string>(keysToReduce),
                    level,
                    true,
                    itemsToDelete);

                var gettingItemsToReduceDuration = new Stopwatch();
                var scheduleReductionsDuration   = new Stopwatch();
                var removeReduceResultsDuration  = new Stopwatch();
                var storageCommitDuration        = new Stopwatch();

                bool retry = true;
                while (retry && reduceParams.ReduceKeys.Count > 0)
                {
                    var reduceBatchAutoThrottlerId = Guid.NewGuid();
                    try
                    {
                        transactionalStorage.Batch(actions =>
                        {
                            token.ThrowIfCancellationRequested();

                            actions.BeforeStorageCommit += storageCommitDuration.Start;
                            actions.AfterStorageCommit  += storageCommitDuration.Stop;

                            var batchTimeWatcher = Stopwatch.StartNew();

                            reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                            int size = 0;

                            IList <MappedResultInfo> persistedResults;
                            var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                            using (StopwatchScope.For(gettingItemsToReduceDuration))
                            {
                                persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token);

                                foreach (var item in persistedResults)
                                {
                                    reduceKeys.Add(item.ReduceKey);
                                    size += item.Size;
                                }
                            }

                            if (persistedResults.Count == 0)
                            {
                                retry = false;
                                return;
                            }

                            var count = persistedResults.Count;

                            autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);

                            if (Log.IsDebugEnabled)
                            {
                                if (persistedResults.Count > 0)
                                {
                                    if (Log.IsDebugEnabled)
                                    {
                                        Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                                      persistedResults.Count,
                                                                      string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                                      index.Index.PublicName, level, batchTimeWatcher.Elapsed));
                                    }
                                }
                                else
                                {
                                    if (Log.IsDebugEnabled)
                                    {
                                        Log.Debug("No reduce keys found for {0}", index.Index.PublicName);
                                    }
                                }
                            }

                            token.ThrowIfCancellationRequested();


                            var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                foreach (var mappedResultInfo in requiredReduceNextTimeSet)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
                                }
                            }

                            if (level != 2)
                            {
                                var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                                using (StopwatchScope.For(scheduleReductionsDuration))
                                {
                                    foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet)
                                    {
                                        token.ThrowIfCancellationRequested();

                                        actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
                                    }
                                }
                            }

                            token.ThrowIfCancellationRequested();

                            var reduceTimeWatcher = Stopwatch.StartNew();

                            var results = persistedResults.Where(x => x.Data != null)
                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                          .ToList();

                            var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            context.MetricsCounters.ReducedPerSecond.Mark(results.Count());

                            reduceLevelStats.Add(performance);

                            var batchDuration = batchTimeWatcher.Elapsed;

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level);
                            }

                            autoTuner.AutoThrottleBatchSize(count, size, batchDuration, skipIncreasingBatchSize);
                        });
                    }
                    finally
                    {
                        long _;
                        autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
                    }
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;

                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));

                reducePerformance.LevelStats.Add(reduceLevelStats);
            }

            // update new preformed multi step
            UpdatePerformedReduceType(index.IndexId, needToMoveToMultiStep, ReduceType.MultiStep);

            // already multi step,
            // if the multi step keys were already removed,
            // the reduce types for those keys also need to be removed
            UpdatePerformedReduceType(index.IndexId, alreadyMultiStep, ReduceType.MultiStep, skipAdd: true);

            return(reducePerformance);
        }
        private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator,
                                                          ConcurrentSet <object> itemsToDelete, bool skipIncreasingBatchSize, CancellationToken token)
        {
            var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >();
            var alreadySingleStepQueue      = new ConcurrentQueue <HashSet <string> >();

            if (Log.IsDebugEnabled)
            {
                Log.Debug(() => $"Executing single step reducing for {keysToReduce.Count} keys [{string.Join(", ", keysToReduce)}]");
            }

            var batchTimeWatcher = Stopwatch.StartNew();

            var reducingBatchThrottlerId = Guid.NewGuid();
            var reducePerformanceStats   = new ReducingPerformanceStats(ReduceType.SingleStep);
            var reduceLevelStats         = new ReduceLevelPeformanceStats
            {
                Started = SystemTime.UtcNow,
                Level   = 2
            };

            try
            {
                var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                var parallelProcessingStart = SystemTime.UtcNow;

                if (context.Database.ThreadPool == null || context.RunReducing == false)
                {
                    throw new OperationCanceledException();
                }

                context.Database.ThreadPool.ExecuteBatch(keysToReduce, enumerator =>
                {
                    var parallelStats = new ParallelBatchStats
                    {
                        StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                    };

                    while (enumerator.MoveNext())
                    {
                        var localKeys = new HashSet <string>();
                        for (var i = 0; i < RavenThreadPool.DefaultPageSize; i++)
                        {
                            token.ThrowIfCancellationRequested();

                            localKeys.Add(enumerator.Current);

                            if (enumerator.MoveNext() == false)
                            {
                                break;
                            }
                        }

                        if (localKeys.Count == 0)
                        {
                            return;
                        }

                        var localNeedToMoveToSingleStep = new HashSet <string>();
                        needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
                        var localAlreadySingleStep = new HashSet <string>();
                        alreadySingleStepQueue.Enqueue(localAlreadySingleStep);

                        transactionalStorage.Batch(actions =>
                        {
                            var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete)
                            {
                                Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                            };

                            var getItemsToReduceDuration = new Stopwatch();

                            int scheduledItemsSum   = 0;
                            int scheduledItemsCount = 0;
                            List <int> scheduledItemsMappedBuckets = new List <int>();
                            using (StopwatchScope.For(getItemsToReduceDuration))
                            {
                                foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token))
                                {
                                    scheduledItemsMappedBuckets.Add(item.Bucket);
                                    scheduledItemsSum += item.Size;
                                    scheduledItemsCount++;
                                }
                            }

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds));

                            autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum);

                            if (scheduledItemsCount == 0)
                            {
                                // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
                                // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
                                // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
                                // That said, they might have smuggled in between versions, or something happened to cause them to be here.
                                // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on

                                Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce));

                                var deletingScheduledReductionsDuration = Stopwatch.StartNew();

                                using (StopwatchScope.For(deletingScheduledReductionsDuration))
                                {
                                    foreach (var reduceKey in keysToReduce)
                                    {
                                        token.ThrowIfCancellationRequested();

                                        actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
                                        actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
                                    }
                                }

                                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                            }

                            var removeReduceResultsDuration = new Stopwatch();

                            foreach (var reduceKey in localKeys)
                            {
                                token.ThrowIfCancellationRequested();

                                var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);

                                if (lastPerformedReduceType != ReduceType.SingleStep)
                                {
                                    localNeedToMoveToSingleStep.Add(reduceKey);
                                }

                                if (lastPerformedReduceType == ReduceType.SingleStep)
                                {
                                    localAlreadySingleStep.Add(reduceKey);
                                }

                                if (lastPerformedReduceType != ReduceType.MultiStep)
                                {
                                    continue;
                                }

                                if (Log.IsDebugEnabled)
                                {
                                    Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey);
                                }

                                using (StopwatchScope.For(removeReduceResultsDuration))
                                {
                                    // now we are in single step but previously multi step reduce was performed for the given key
                                    var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token);

                                    // add scheduled items too to be sure we will delete reduce results of already deleted documents
                                    foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets))
                                    {
                                        actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
                                        actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
                                    }
                                }
                            }

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
                        });

                        parallelOperations.Enqueue(parallelStats);
                    }
                }, description: $"Performing single step reduction for index {index.Index.PublicName} from etag {index.Index.GetLastEtagFromStats()} for {keysToReduce.Count} keys", database: context.Database);

                reduceLevelStats.Operations.Add(new ParallelPerformanceStats
                {
                    NumberOfThreads   = parallelOperations.Count,
                    DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                    BatchedOperations = parallelOperations.ToList()
                });

                var getMappedResultsDuration = new Stopwatch();

                var reductionPerformanceStats = new List <IndexingPerformanceStats>();

                var keysLeftToReduce = new HashSet <string>(keysToReduce);
                while (keysLeftToReduce.Count > 0)
                {
                    var keysReturned = new HashSet <string>();

                    // Try to diminish the allocations happening because of .Resize()
                    var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count);

                    context.TransactionalStorage.Batch(actions =>
                    {
                        var take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                        using (StopwatchScope.For(getMappedResultsDuration))
                        {
                            mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults);
                        }
                    });

                    var count = mappedResults.Count;

                    int size = 0;
                    foreach (var item in mappedResults)
                    {
                        item.Bucket = 0;
                        size       += item.Size;
                    }

                    var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray();

                    context.MetricsCounters.ReducedPerSecond.Mark(results.Length);

                    token.ThrowIfCancellationRequested();

                    var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count);

                    reductionPerformanceStats.Add(performance);

                    autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed, skipIncreasingBatchSize);
                }

                // update new preformed single step
                UpdatePerformedSingleStep(index.IndexId, needToMoveToSingleStepQueue);

                // already single step,
                // if the multi step keys were already removed,
                // the reduce types for those keys needs to be removed also
                UpdatePerformedSingleStep(index.IndexId, alreadySingleStepQueue, skipAdd: true);

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index

                foreach (var stats in reductionPerformanceStats)
                {
                    reduceLevelStats.Add(stats);
                }

                reducePerformanceStats.LevelStats.Add(reduceLevelStats);
            }
            finally
            {
                long _;
                autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
            }

            return(reducePerformanceStats);
        }
Exemple #4
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var stats = new IndexingWorkStats();

            foreach (
                var mappedResultFromDocument in
                GroupByDocumentId(context,
                                  RobustEnumerationIndex(documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats)))
            {
                var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                foreach (
                    var doc in
                    RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    actions.MapReduce.PutMappedResult(name, docId, reduceKey, data);

                    changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                }
            }
            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Exemple #5
0
        public static Sort GetSort(this IndexQuery self, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator)
        {
            var spatialQuery = self as SpatialIndexQuery;
            var sortedFields = self.SortedFields;

            if (sortedFields == null || sortedFields.Length <= 0)
            {
                if (spatialQuery == null || string.IsNullOrEmpty(self.Query) == false)
                {
                    return(null);
                }
                sortedFields = new[] { new SortedField(Constants.DistanceFieldName), };
            }

            return(new Sort(sortedFields
                            .Select(sortedField =>
            {
                if (sortedField.Field == Constants.TemporaryScoreValue)
                {
                    return SortField.FIELD_SCORE;
                }
                if (sortedField.Field.StartsWith(Constants.RandomFieldName))
                {
                    var parts = sortedField.Field.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
                    if (parts.Length < 2)                                                     // truly random
                    {
                        return new RandomSortField(Guid.NewGuid().ToString());
                    }
                    return new RandomSortField(parts[1]);
                }
                if (spatialQuery != null && sortedField.Field == Constants.DistanceFieldName)
                {
                    var spatialField = viewGenerator.GetSpatialField(spatialQuery.SpatialFieldName);
                    var shape = spatialField.ReadShape(spatialQuery.QueryShape);
                    var dsort = new SpatialDistanceFieldComparatorSource(spatialField, shape.GetCenter());
                    return new SortField(Constants.DistanceFieldName, dsort, sortedField.Descending);
                }
                var sortOptions = GetSortOption(indexDefinition, sortedField.Field, self);

                if (sortOptions == null || sortOptions == SortOptions.None)
                {
                    return new SortField(sortedField.Field, CultureInfo.InvariantCulture, sortedField.Descending);
                }

                if (sortOptions.Value == SortOptions.Short)
                {
                    sortOptions = SortOptions.Int;
                }
                return new SortField(sortedField.Field, (int)sortOptions.Value, sortedField.Descending);
            })
                            .ToArray()));
        }
Exemple #6
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count            = 0;
            var sourceCount      = 0;
            var deleted          = new Dictionary <ReduceKeyAndBucket, int>();
            var performance      = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
            var performanceStats = new List <BasePerformanceStats>();

            var usedStorageAccessors = new ConcurrentSet <IStorageActionsAccessor>();

            if (usedStorageAccessors.TryAdd(actions))
            {
                var storageCommitDuration = new Stopwatch();

                actions.BeforeStorageCommit += storageCommitDuration.Start;

                actions.AfterStorageCommit += () =>
                {
                    storageCommitDuration.Stop();

                    performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                };
            }

            var deleteMappedResultsDuration = new Stopwatch();
            var documentsWrapped            = batch.Docs.Select(doc =>
            {
                token.ThrowIfCancellationRequested();

                sourceCount++;
                var documentId = doc.__document_id;

                using (StopwatchScope.For(deleteMappedResultsDuration))
                {
                    actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                }

                return(doc);
            })
                                              .Where(x => x is FilteredDocument == false)
                                              .ToList();

            performanceStats.Add(new PerformanceStats
            {
                Name       = IndexingOperation.Map_DeleteMappedResults,
                DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
            });

            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

            var parallelProcessingStart = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                token.ThrowIfCancellationRequested();
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                };

                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();

                var linqExecutionDuration            = new Stopwatch();
                var reduceInMapLinqExecutionDuration = new Stopwatch();
                var putMappedResultsDuration         = new Stopwatch();
                var convertToRavenJObjectDuration    = new Stopwatch();

                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        if (usedStorageAccessors.TryAdd(accessor))
                        {
                            var storageCommitDuration = new Stopwatch();

                            accessor.BeforeStorageCommit += storageCommitDuration.Start;

                            accessor.AfterStorageCommit += () =>
                            {
                                storageCommitDuration.Stop();

                                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                            };
                        }

                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        bool skipDocument          = false;

                        foreach (var currentDoc in mapResults)
                        {
                            token.ThrowIfCancellationRequested();

                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            else if (skipDocument)
                            {
                                continue;
                            }

                            RavenJObject currentDocJObject;
                            using (StopwatchScope.For(convertToRavenJObjectDuration))
                            {
                                currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer);
                            }

                            currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject));

                            if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
                            {
                                skipDocument = true;
                                currentDocumentResults.Clear();
                                continue;
                            }

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });

                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                }
            });

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                BatchedOperations = parallelOperations.ToList()
            });

            var updateDocumentReferencesDuration = new Stopwatch();

            using (StopwatchScope.For(updateDocumentReferencesDuration))
            {
                UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
            }
            performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            actions.General.MaybePulseTransaction();

            var parallelReductionOperations = new ConcurrentQueue <ParallelBatchStats>();
            var parallelReductionStart      = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds
                };

                var scheduleReductionsDuration = new Stopwatch();

                using (StopwatchScope.For(scheduleReductionsDuration))
                {
                    while (enumerator.MoveNext())
                    {
                        accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                        accessor.General.MaybePulseTransaction();
                    }
                }

                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                parallelReductionOperations.Enqueue(parallelStats);
            }));

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelReductionOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds,
                BatchedOperations = parallelReductionOperations.ToList()
            });

            UpdateIndexingStats(context, stats);

            performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats);

            logIndexing.Debug("Mapped {0} documents for {1}", count, PublicName);

            return(performance);
        }
Exemple #7
0
        private int ProcessBatch(AbstractViewGenerator viewGenerator, List <object> currentDocumentResults, string currentKey, HashSet <ReduceKeyAndBucket> changes,
                                 IStorageActionsAccessor actions,
                                 IDictionary <string, int> statsPerKey, Stopwatch reduceDuringMapLinqExecution, Stopwatch putMappedResultsDuration, Stopwatch convertToRavenJObjectDuration)
        {
            if (currentKey == null || currentDocumentResults.Count == 0)
            {
                return(0);
            }

            var old = CurrentIndexingScope.Current;

            try
            {
                CurrentIndexingScope.Current = null;

                if (logIndexing.IsDebugEnabled)
                {
                    var sb = new StringBuilder()
                             .AppendFormat("Index {0} for document {1} resulted in:", PublicName, currentKey)
                             .AppendLine();
                    foreach (var currentDocumentResult in currentDocumentResults)
                    {
                        sb.AppendLine(JsonConvert.SerializeObject(currentDocumentResult));
                    }
                    logIndexing.Debug(sb.ToString());
                }

                int count = 0;

                var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition, reduceDuringMapLinqExecution);
                foreach (var doc in results)
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, currentKey);
                        continue;
                    }
                    string reduceKey = ReduceKeyToString(reduceValue);

                    RavenJObject data;
                    using (StopwatchScope.For(convertToRavenJObjectDuration))
                    {
                        data = GetMappedData(doc);
                    }

                    if (logIndexing.IsDebugEnabled)
                    {
                        logIndexing.Debug("Index {0} for document {1} resulted in ({2}): {3}", PublicName, currentKey, reduceKey, data);
                    }

                    using (StopwatchScope.For(putMappedResultsDuration))
                    {
                        actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data);
                    }

                    statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1;
                    actions.General.MaybePulseTransaction();
                    changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey));
                }
                return(count);
            }
            finally
            {
                CurrentIndexingScope.Current = old;
            }
        }
Exemple #8
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator)
        {
            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List <JsonDocument>();

            TransactionalStorage.Batch(actions =>
            {
                var query = GetQueryForAllMatchingDocumentsForIndex(generator);

                JsonDocument highestByEtag = null;

                var cts = new CancellationTokenSource();
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                    using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery
                    {
                        Query = query,
                        PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch
                    }, actions, linked)
                    {
                        ShouldSkipDuplicateChecking = true
                    })
                    {
                        op.Init();
                        if (op.Header.TotalResults == 0 ||
                            (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch))
                        {
                            // we don't apply this optimization if the total number of results
                            // to index is more than the max numbers to index in a single batch.
                            // The idea here is that we need to keep the amount
                            // of memory we use to a manageable level even when introducing a new index to a BIG
                            // database
                            try
                            {
                                cts.Cancel();
                                // we have to run just a little bit of the query to properly setup the disposal
                                op.Execute(o => { });
                            }
                            catch (OperationCanceledException)
                            {
                            }
                            return;
                        }

                        Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index,
                                  op.Header.TotalResults);
                        op.Execute(document =>
                        {
                            var metadata     = document.Value <RavenJObject>(Constants.Metadata);
                            var key          = metadata.Value <string>("@id");
                            var etag         = Etag.Parse(metadata.Value <string>("@etag"));
                            var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified));
                            document.Remove(Constants.Metadata);

                            var doc = new JsonDocument
                            {
                                DataAsJson          = document,
                                Etag                = etag,
                                Key                 = key,
                                LastModified        = lastModified,
                                SkipDeleteFromIndex = true,
                                Metadata            = metadata
                            };

                            docsToIndex.Add(doc);

                            if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0)
                            {
                                highestByEtag = doc;
                            }
                        });
                    }

                result = new PrecomputedIndexingBatch
                {
                    LastIndexed  = highestByEtag.Etag,
                    LastModified = highestByEtag.LastModified.Value,
                    Documents    = docsToIndex,
                    Index        = index
                };
            });

            if (result != null && result.Documents != null && result.Documents.Count > 0)
            {
                Database.IndexingExecuter.IndexPrecomputedBatch(result);
            }
        }