Beispiel #1
0
        private void IndexDocuments(IStorageActionsAccessor actions, string index, JsonDocument[] jsonDocs)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(index);

            if (viewGenerator == null)
            {
                return;                 // index was deleted, probably
            }
            var dateTime = jsonDocs.Min(x => x.LastModified);

            var documentRetriever = new DocumentRetriever(null, context.ReadTriggers);

            try
            {
                log.DebugFormat("Indexing {0} documents for index: {1}", jsonDocs.Length, index);
                context.IndexStorage.Index(index, viewGenerator,
                                           jsonDocs
                                           .Select(doc => documentRetriever
                                                   .ExecuteReadTriggers(doc, null, ReadOperation.Index))
                                           .Where(doc => doc != null)
                                           .Select(x => JsonToExpando.Convert(x.ToJson())), context, actions, dateTime);
            }
            catch (Exception e)
            {
                if (actions.IsWriteConflict(e))
                {
                    return;
                }
                log.WarnFormat(e, "Failed to index documents for index: {0}", index);
            }
        }
Beispiel #2
0
        public override void Execute(WorkContext context)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(Index);

            if (viewGenerator == null)
            {
                return;                 // index was deleted, probably
            }
            context.TransactionaStorage.Batch(actions =>
            {
                var docsToIndex = actions.Documents.DocumentsById(FromId, ToId)
                                  .Select(d => d.Item1)
                                  .Where(x => x != null)
                                  .Select(s => JsonToExpando.Convert(s.ToJson()));
                context.IndexStorage.Index(Index, viewGenerator, docsToIndex, context, actions);
            });
        }
Beispiel #3
0
        public override void Execute(WorkContext context)
        {
            context.TransactionaStorage.Batch(actions =>
            {
                var jsonDocuments = Keys.Select(key => actions.DocumentByKey(key, null))
                                    .Where(x => x != null)
                                    .Select(x => JsonToExpando.Convert(x.ToJson()))
                                    .ToArray();

                var keysAsString  = string.Join(", ", Keys);
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(Index);
                if (viewGenerator == null)
                {
                    return;                     // index was deleted, probably
                }
                try
                {
                    logger.DebugFormat("Indexing documents: [{0}] for index: {1}", keysAsString, Index);

                    var failureRate = actions.GetFailureRate(Index);
                    if (failureRate.IsInvalidIndex)
                    {
                        logger.InfoFormat("Skipped indexing documents: [{0}] for index: {1} because failure rate is too high: {2}",
                                          keysAsString, Index,
                                          failureRate.FailureRate);
                        return;
                    }


                    context.IndexStorage.Index(Index, viewGenerator, jsonDocuments,
                                               context, actions);
                }
                catch (Exception e)
                {
                    logger.WarnFormat(e, "Failed to index document  [{0}] for index: {1}", keysAsString, Index);
                }
            });
        }
        private IEnumerable <IndexingBatchForIndex> FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, List <JsonDocument> jsonDocs, Etag highestETagInBatch)
        {
            var last = jsonDocs.Last();

            Debug.Assert(last.Etag != null);
            Debug.Assert(last.LastModified != null);

            var lastEtag     = last.Etag;
            var lastModified = last.LastModified.Value;

            var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers, context.Database.InFlightTransactionalState);

            var filteredDocs =
                BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc =>
            {
                var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
                return(filteredDoc == null ? new
                {
                    Doc = doc,
                    Json = (object)new FilteredDocument(doc)
                } : new
                {
                    Doc = filteredDoc,
                    Json = JsonToExpando.Convert(doc.ToJson())
                });
            });

            Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);

            var results = new IndexingBatchForIndex[indexesToWorkOn.Count];
            var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count];

            BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) =>
            {
                var indexName     = indexToWorkOn.Index.PublicName;
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
                if (viewGenerator == null)
                {
                    return;                     // probably deleted
                }
                var batch = new IndexingBatch(highestETagInBatch);

                foreach (var item in filteredDocs)
                {
                    if (defaultPrefetchingBehavior.FilterDocuments(item.Doc) == false)
                    {
                        continue;
                    }

                    // did we already indexed this document in this index?
                    var etag = item.Doc.Etag;
                    if (etag == null)
                    {
                        continue;
                    }

                    // is the Raven-Entity-Name a match for the things the index executes on?
                    if (viewGenerator.ForEntityNames.Count != 0 &&
                        viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false)
                    {
                        continue;
                    }

                    batch.Add(item.Doc, item.Json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc));

                    if (batch.DateTime == null)
                    {
                        batch.DateTime = item.Doc.LastModified;
                    }
                    else
                    {
                        batch.DateTime = batch.DateTime > item.Doc.LastModified
                                                                                         ? item.Doc.LastModified
                                                                                         : batch.DateTime;
                    }
                }

                if (batch.Docs.Count == 0)
                {
                    Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
                              lastEtag, lastModified);
                    // we use it this way to batch all the updates together
                    actions[i] = accessor =>
                    {
                        accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified);

                        accessor.AfterStorageCommit += () =>
                        {
                            indexToWorkOn.Index.EnsureIndexWriter();
                            indexToWorkOn.Index.Flush(lastEtag);
                        };
                    };
                    return;
                }
                if (Log.IsDebugEnabled)
                {
                    Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids));
                }
                results[i] = new IndexingBatchForIndex
                {
                    Batch           = batch,
                    IndexId         = indexToWorkOn.IndexId,
                    Index           = indexToWorkOn.Index,
                    LastIndexedEtag = indexToWorkOn.LastIndexedEtag
                };
            });

            transactionalStorage.Batch(actionsAccessor =>
            {
                foreach (var action in actions)
                {
                    if (action != null)
                    {
                        action(actionsAccessor);
                    }
                }
            });

            return(results.Where(x => x != null));
        }
 public static dynamic GetDocumentFromString(string json)
 {
     return(JsonToExpando.Convert(RavenJObject.Parse(json)));
 }
Beispiel #6
0
        protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName);

            if (viewGenerator == null)
            {
                return;
            }
            TimeSpan reduceDuration    = TimeSpan.Zero;
            int      totalCount        = 0;
            int      totalSize         = 0;
            bool     operationCanceled = false;
            var      itemsToDelete     = new List <object>();

            try
            {
                var sw = Stopwatch.StartNew();
                for (int i = 0; i < 3; i++)
                {
                    var level = i;
                    transactionalStorage.Batch(actions =>
                    {
                        context.CancellationToken.ThrowIfCancellationRequested();

                        var sp = Stopwatch.StartNew();
                        var persistedResults = actions.MapReduce.GetItemsToReduce
                                               (
                            take: context.CurrentNumberOfItemsToReduceInSingleBatch,
                            level: level,
                            index: indexToWorkOn.IndexName,
                            itemsToDelete: itemsToDelete
                                               )
                                               .ToList();

                        totalCount += persistedResults.Count;
                        totalSize  += persistedResults.Sum(x => x.Size);

                        if (Log.IsDebugEnabled)
                        {
                            if (persistedResults.Count > 0)
                            {
                                Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                              persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName, level, sp.Elapsed));
                            }
                            else
                            {
                                Log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName);
                            }
                        }

                        context.CancellationToken.ThrowIfCancellationRequested();

                        var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)).Distinct().ToArray();
                        foreach (var mappedResultInfo in requiredReduceNextTime)
                        {
                            actions.MapReduce.RemoveReduceResults(indexToWorkOn.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
                        }
                        if (level != 2)
                        {
                            var reduceKeysAndBukcets = requiredReduceNextTime
                                                       .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
                                                       .Distinct()
                                                       .ToArray();
                            actions.MapReduce.ScheduleReductions(indexToWorkOn.IndexName, level + 1, reduceKeysAndBukcets);
                        }

                        var results = persistedResults
                                      .Where(x => x.Data != null)
                                      .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                      .ToArray();
                        var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey),
                                                              StringComparer.InvariantCultureIgnoreCase);
                        context.ReducedPerSecIncreaseBy(results.Length);

                        context.CancellationToken.ThrowIfCancellationRequested();
                        sp = Stopwatch.StartNew();
                        context.IndexStorage.Reduce(indexToWorkOn.IndexName, viewGenerator, results, level, context, actions, reduceKeys);
                        Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed,
                                  results.Length, indexToWorkOn.IndexName, sp.Elapsed);
                    });
                }
                reduceDuration = sw.Elapsed;
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            finally
            {
                if (operationCanceled == false)
                {
                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp);
                    });
                    autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration);
                }
            }
        }
Beispiel #7
0
        private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator,
                                                          ConcurrentSet <object> itemsToDelete, CancellationToken token)
        {
            var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >();

            if (Log.IsDebugEnabled)
            {
                Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce)));
            }

            var batchTimeWatcher = Stopwatch.StartNew();

            var reducingBatchThrottlerId = Guid.NewGuid();
            var reducePerformanceStats   = new ReducingPerformanceStats(ReduceType.SingleStep);
            var reduceLevelStats         = new ReduceLevelPeformanceStats
            {
                Started = SystemTime.UtcNow,
                Level   = 2
            };

            try
            {
                var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                var parallelProcessingStart = SystemTime.UtcNow;

                BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
                {
                    var parallelStats = new ParallelBatchStats
                    {
                        StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                    };

                    var localNeedToMoveToSingleStep = new HashSet <string>();
                    needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
                    var localKeys = new HashSet <string>();
                    while (enumerator.MoveNext())
                    {
                        token.ThrowIfCancellationRequested();

                        localKeys.Add(enumerator.Current);
                    }

                    transactionalStorage.Batch(actions =>
                    {
                        var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete)
                        {
                            Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                        };

                        var getItemsToReduceDuration = Stopwatch.StartNew();

                        int scheduledItemsSum   = 0;
                        int scheduledItemsCount = 0;
                        List <int> scheduledItemsMappedBuckets = new List <int>();
                        using (StopwatchScope.For(getItemsToReduceDuration))
                        {
                            foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token))
                            {
                                scheduledItemsMappedBuckets.Add(item.Bucket);
                                scheduledItemsSum += item.Size;
                                scheduledItemsCount++;
                            }
                        }

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds));

                        autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum);

                        if (scheduledItemsCount == 0)
                        {
                            // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
                            // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
                            // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
                            // That said, they might have smuggled in between versions, or something happened to cause them to be here.
                            // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on

                            Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce));

                            var deletingScheduledReductionsDuration = Stopwatch.StartNew();

                            using (StopwatchScope.For(deletingScheduledReductionsDuration))
                            {
                                foreach (var reduceKey in keysToReduce)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
                                    actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
                                }
                            }

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                        }

                        var removeReduceResultsDuration = new Stopwatch();

                        foreach (var reduceKey in localKeys)
                        {
                            token.ThrowIfCancellationRequested();

                            var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);

                            if (lastPerformedReduceType != ReduceType.SingleStep)
                            {
                                localNeedToMoveToSingleStep.Add(reduceKey);
                            }

                            if (lastPerformedReduceType != ReduceType.MultiStep)
                            {
                                continue;
                            }

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey);
                            }

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                // now we are in single step but previously multi step reduce was performed for the given key
                                var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token);

                                // add scheduled items too to be sure we will delete reduce results of already deleted documents
                                foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets))
                                {
                                    actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
                                    actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
                                }
                            }
                        }

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });
                });

                reduceLevelStats.Operations.Add(new ParallelPerformanceStats
                {
                    NumberOfThreads   = parallelOperations.Count,
                    DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                    BatchedOperations = parallelOperations.ToList()
                });

                var getMappedResultsDuration = new Stopwatch();

                var reductionPerformanceStats = new List <IndexingPerformanceStats>();

                var keysLeftToReduce = new HashSet <string>(keysToReduce);
                while (keysLeftToReduce.Count > 0)
                {
                    var keysReturned = new HashSet <string>();

                    // Try to diminish the allocations happening because of .Resize()
                    var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count);

                    context.TransactionalStorage.Batch(actions =>
                    {
                        var take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                        using (StopwatchScope.For(getMappedResultsDuration))
                        {
                            mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults);
                        }
                    });

                    var count = mappedResults.Count;

                    int size = 0;
                    foreach (var item in mappedResults)
                    {
                        item.Bucket = 0;
                        size       += item.Size;
                    }

                    var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray();

                    context.MetricsCounters.ReducedPerSecond.Mark(results.Length);

                    token.ThrowIfCancellationRequested();

                    var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count);

                    reductionPerformanceStats.Add(performance);

                    autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);
                }

                var needToMoveToSingleStep = new HashSet <string>();

                HashSet <string> set;
                while (needToMoveToSingleStepQueue.TryDequeue(out set))
                {
                    needToMoveToSingleStep.UnionWith(set);
                }

                foreach (var reduceKey in needToMoveToSingleStep)
                {
                    string localReduceKey = reduceKey;
                    transactionalStorage.Batch(actions =>
                                               actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep));
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index

                foreach (var stats in reductionPerformanceStats)
                {
                    reduceLevelStats.Add(stats);
                }

                reducePerformanceStats.LevelStats.Add(reduceLevelStats);
            }
            finally
            {
                long _;
                autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
            }

            return(reducePerformanceStats);
        }
Beispiel #8
0
        private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token)
        {
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    token.ThrowIfCancellationRequested();

                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }

                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList();
                    foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
                    {
                        actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
                    }
                }
            });

            var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep);


            for (int i = 0; i < 3; i++)
            {
                var level = i;

                var reduceLevelStats = new ReduceLevelPeformanceStats()
                {
                    Level   = level,
                    Started = SystemTime.UtcNow,
                };

                var reduceParams = new GetItemsToReduceParams(
                    index.IndexId,
                    new HashSet <string>(keysToReduce),
                    level,
                    true,
                    itemsToDelete);

                var gettingItemsToReduceDuration = new Stopwatch();
                var scheduleReductionsDuration   = new Stopwatch();
                var removeReduceResultsDuration  = new Stopwatch();
                var storageCommitDuration        = new Stopwatch();

                bool retry = true;
                while (retry && reduceParams.ReduceKeys.Count > 0)
                {
                    var reduceBatchAutoThrottlerId = Guid.NewGuid();
                    try
                    {
                        transactionalStorage.Batch(actions =>
                        {
                            token.ThrowIfCancellationRequested();

                            actions.BeforeStorageCommit += storageCommitDuration.Start;
                            actions.AfterStorageCommit  += storageCommitDuration.Stop;

                            var batchTimeWatcher = Stopwatch.StartNew();

                            reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                            int size = 0;

                            IList <MappedResultInfo> persistedResults;
                            var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                            using (StopwatchScope.For(gettingItemsToReduceDuration))
                            {
                                persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token);

                                foreach (var item in persistedResults)
                                {
                                    reduceKeys.Add(item.ReduceKey);
                                    size += item.Size;
                                }
                            }

                            if (persistedResults.Count == 0)
                            {
                                retry = false;
                                return;
                            }

                            var count = persistedResults.Count;

                            autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);

                            if (Log.IsDebugEnabled)
                            {
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                                  persistedResults.Count,
                                                                  string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                                  index.Index.PublicName, level, batchTimeWatcher.Elapsed));
                                }
                                else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.Index.PublicName);
                                }
                            }

                            token.ThrowIfCancellationRequested();


                            var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                foreach (var mappedResultInfo in requiredReduceNextTimeSet)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
                                }
                            }

                            if (level != 2)
                            {
                                var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                                using (StopwatchScope.For(scheduleReductionsDuration))
                                {
                                    foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet)
                                    {
                                        token.ThrowIfCancellationRequested();

                                        actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
                                    }
                                }
                            }

                            token.ThrowIfCancellationRequested();

                            var reduceTimeWatcher = Stopwatch.StartNew();

                            var results = persistedResults.Where(x => x.Data != null)
                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                          .ToList();

                            var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            context.MetricsCounters.ReducedPerSecond.Mark(results.Count());

                            reduceLevelStats.Add(performance);

                            var batchDuration = batchTimeWatcher.Elapsed;

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level);
                            }

                            autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
                        });
                    }
                    finally
                    {
                        long _;
                        autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
                    }
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;

                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));

                reducePerformance.LevelStats.Add(reduceLevelStats);
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                token.ThrowIfCancellationRequested();

                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }

            return(reducePerformance);
        }
Beispiel #9
0
        private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete)
        {
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }
                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
                    foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
                    {
                        actions.MapReduce.ScheduleReductions(index.IndexName, 0, result);
                    }
                }
            });

            for (int i = 0; i < 3; i++)
            {
                var level = i;

                var reduceParams = new GetItemsToReduceParams(
                    index.IndexName,
                    keysToReduce,
                    level,
                    true,
                    itemsToDelete);

                bool retry = true;
                while (retry && reduceParams.ReduceKeys.Count > 0)
                {
                    var reduceBatchAutoThrottlerId = Guid.NewGuid();
                    try
                    {
                        transactionalStorage.Batch(actions =>
                        {
                            context.CancellationToken.ThrowIfCancellationRequested();

                            var batchTimeWatcher = Stopwatch.StartNew();

                            reduceParams.Take    = context.CurrentNumberOfItemsToReduceInSingleBatch;
                            var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList();
                            if (persistedResults.Count == 0)
                            {
                                retry = false;
                                return;
                            }

                            var count = persistedResults.Count;
                            var size  = persistedResults.Sum(x => x.Size);
                            autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size);

                            if (Log.IsDebugEnabled)
                            {
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                                  persistedResults.Count,
                                                                  string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                                  index.IndexName, level, batchTimeWatcher.Elapsed));
                                }
                                else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.IndexName);
                                }
                            }

                            context.CancellationToken.ThrowIfCancellationRequested();

                            var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
                                                         .OrderBy(x => x.Bucket)
                                                         .Distinct()
                                                         .ToArray();
                            foreach (var mappedResultInfo in requiredReduceNextTime)
                            {
                                actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
                                                                      mappedResultInfo.Bucket);
                            }

                            if (level != 2)
                            {
                                var reduceKeysAndBuckets = requiredReduceNextTime
                                                           .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
                                                           .Distinct()
                                                           .ToArray();
                                foreach (var reduceKeysAndBucket in reduceKeysAndBuckets)
                                {
                                    actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket);
                                }
                            }

                            var results = persistedResults
                                          .Where(x => x.Data != null)
                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                          .ToArray();
                            var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey),
                                                                  StringComparer.InvariantCultureIgnoreCase);
                            context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

                            context.CancellationToken.ThrowIfCancellationRequested();
                            var reduceTimeWatcher = Stopwatch.StartNew();

                            context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            var batchDuration = batchTimeWatcher.Elapsed;
                            Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration,
                                      results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level);

                            autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
                        });
                    }
                    finally
                    {
                        long _;
                        autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _);
                    }
                }
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }
        }
Beispiel #10
0
        private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
                                      ConcurrentSet <object> itemsToDelete)
        {
            var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >();

            Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
            var batchTimeWatcher         = Stopwatch.StartNew();
            var count                    = 0;
            var size                     = 0;
            var state                    = new ConcurrentQueue <Tuple <HashSet <string>, List <MappedResultInfo> > >();
            var reducingBatchThrottlerId = Guid.NewGuid();

            try
            {
                BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
                {
                    var localNeedToMoveToSingleStep = new HashSet <string>();
                    needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
                    var localKeys = new HashSet <string>();
                    while (enumerator.MoveNext())
                    {
                        localKeys.Add(enumerator.Current);
                    }

                    transactionalStorage.Batch(actions =>
                    {
                        var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0,
                                                                                loadData: false,
                                                                                itemsToDelete: itemsToDelete)
                        {
                            Take = int.MaxValue                            // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                        };


                        var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList();
                        autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size));
                        if (scheduledItems.Count == 0)
                        {
                            if (Log.IsWarnEnabled)
                            {
                                Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])",
                                         string.Join(", ", keysToReduce));
                            }
                            // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
                            // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
                            // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
                            // That said, they might bave smuggled in between versions, or something happened to cause them to be here.
                            // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on
                            foreach (var reduceKey in keysToReduce)
                            {
                                actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey);
                                actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey);
                            }
                        }

                        foreach (var reduceKey in localKeys)
                        {
                            var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

                            if (lastPerformedReduceType != ReduceType.SingleStep)
                            {
                                localNeedToMoveToSingleStep.Add(reduceKey);
                            }

                            if (lastPerformedReduceType != ReduceType.MultiStep)
                            {
                                continue;
                            }

                            Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
                                      reduceKey);

                            // now we are in single step but previously multi step reduce was performed for the given key
                            var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

                            // add scheduled items too to be sure we will delete reduce results of already deleted documents
                            mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

                            foreach (var mappedBucket in mappedBuckets.Distinct())
                            {
                                actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
                                actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
                            }
                        }

                        var mappedResults = actions.MapReduce.GetMappedResults(
                            index.IndexName,
                            localKeys,
                            loadData: true
                            ).ToList();

                        Interlocked.Add(ref count, mappedResults.Count);
                        Interlocked.Add(ref size, mappedResults.Sum(x => x.Size));

                        mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

                        state.Enqueue(Tuple.Create(localKeys, mappedResults));
                    });
                });

                var reduceKeys = new HashSet <string>(state.SelectMany(x => x.Item1));

                var results = state.SelectMany(x => x.Item2)
                              .Where(x => x.Data != null)
                              .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                              .ToArray();
                context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

                context.TransactionalStorage.Batch(actions =>
                                                   context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x => x.Item2.Count))
                                                   );

                autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);

                var needToMoveToSingleStep = new HashSet <string>();
                HashSet <string> set;
                while (needToMoveToSingleStepQueue.TryDequeue(out set))
                {
                    needToMoveToSingleStep.UnionWith(set);
                }

                foreach (var reduceKey in needToMoveToSingleStep)
                {
                    string localReduceKey = reduceKey;
                    transactionalStorage.Batch(actions =>
                                               actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
                }
            }
            finally
            {
                long _;
                autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _);
            }
        }
Beispiel #11
0
        private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch, out List<IndexToWorkOn> filteredOutIndexes)
        {
            var innerFilteredOutIndexes = new ConcurrentStack<IndexToWorkOn>();
            var last = jsonDocs.Last();

            Debug.Assert(last.Etag != null);
            Debug.Assert(last.LastModified != null);

            var lastEtag = last.Etag;
            var lastModified = last.LastModified.Value;


            var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers);

            var filteredDocs =
                BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc =>
                {
                    var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, ReadOperation.Index);
                    return filteredDoc == null ? new
                    {
                        Doc = doc,
                        Json = (object)new FilteredDocument(doc)
                    } : new
                    {
                        Doc = filteredDoc,
                        Json = JsonToExpando.Convert(doc.ToJson())
                    };
                });

            if ( Log.IsDebugEnabled ) 
                Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);


            var results = new ConcurrentQueue<IndexingBatchForIndex>();
            var actions = new ConcurrentQueue<Action<IStorageActionsAccessor>>();
            context.Database.MappingThreadPool.ExecuteBatch(indexesToWorkOn, indexToWorkOn =>
            {
                var indexName = indexToWorkOn.Index.PublicName;
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
                if (viewGenerator == null)
                    return; // probably deleted

                var batch = new IndexingBatch(highestETagInBatch);


                foreach (var filteredDoc in filteredDocs)
                {
                    var doc = filteredDoc.Doc;
                    var json = filteredDoc.Json;


                    if (defaultPrefetchingBehavior.FilterDocuments(doc) == false
                        || doc.Etag.CompareTo(indexToWorkOn.LastIndexedEtag) <= 0)
                        continue;

                    // did we already indexed this document in this index?

                    var etag = doc.Etag;
                    if (etag == null)
                        continue;

                    // is the Raven-Entity-Name a match for the things the index executes on?
                    if (viewGenerator.ForEntityNames.Count != 0 &&
                        viewGenerator.ForEntityNames.Contains(doc.Metadata.Value<string>(Constants.RavenEntityName)) == false)
                    {
                        continue;
                    }

                    batch.Add(doc, json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(doc));

                    if (batch.DateTime == null)

                        batch.DateTime = doc.LastModified;
                    else
                        batch.DateTime = batch.DateTime > doc.LastModified
                            ? doc.LastModified
                                             : batch.DateTime;
                }

                if (batch.Docs.Count == 0)
                {
                    if ( Log.IsDebugEnabled )
                        Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified);

                    // we use it this way to batch all the updates together
                    if (indexToWorkOn.LastIndexedEtag.CompareTo(lastEtag) < 0)
                        actions.Enqueue(accessor =>
                    {
                        accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified);
                        accessor.AfterStorageCommit += () =>
                        {
                            indexToWorkOn.Index.EnsureIndexWriter();
                            indexToWorkOn.Index.Flush(lastEtag);
                        };
                        });
                    innerFilteredOutIndexes.Push(indexToWorkOn);
                    context.MarkIndexFilteredOut(indexName);
                    return;
                }
                
                if (Log.IsDebugEnabled)
                    Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids));
                
                results.Enqueue(new IndexingBatchForIndex
                {
                    Batch = batch,
                    IndexId = indexToWorkOn.IndexId,
                    Index = indexToWorkOn.Index,
                    LastIndexedEtag = indexToWorkOn.LastIndexedEtag
            });
            }, description: string.Format("Filtering documents for {0} indexes", indexesToWorkOn.Count));

            filteredOutIndexes = innerFilteredOutIndexes.ToList();
                foreach (var action in actions)
                {
                bool keepTrying = true;
                for (int i = 0; i < 10 && keepTrying; i++)
                {
                    keepTrying = false;
                    transactionalStorage.Batch(actionsAccessor =>
                    {
                    if (action != null)
                    {
                        try
                        {
                            action(actionsAccessor);
                        }
                        catch (Exception e)
                        {
                                if (actionsAccessor.IsWriteConflict(e))
                                {
                                    keepTrying = true;
                                    return;
                        }
                                throw;
                    }
                }
            });

                    if (keepTrying)
                        Thread.Sleep(11);
                }
            }
            return results.Where(x => x != null);
        }
Beispiel #12
0
        private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete)
        {
            var result = new ReduceResultStats();
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }
                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
                    actions.MapReduce.ScheduleReductions(index.IndexName, 0,
                                                         mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)));
                }
            });

            for (int i = 0; i < 3; i++)
            {
                var level = i;

                bool retry            = true;
                var  itemsAlreadySeen = new HashSet <Tuple <string, int> >();
                while (retry)
                {
                    transactionalStorage.Batch(actions =>
                    {
                        context.CancellationToken.ThrowIfCancellationRequested();

                        var sp = Stopwatch.StartNew();

                        var persistedResults = actions.MapReduce.GetItemsToReduce
                                               (
                            level: level,
                            reduceKeys: keysToReduce,
                            index: index.IndexName,
                            itemsToDelete: itemsToDelete,
                            loadData: true,
                            take: context.CurrentNumberOfItemsToReduceInSingleBatch,
                            itemsAlreadySeen: itemsAlreadySeen
                                               ).ToList();
                        if (persistedResults.Count == 0)
                        {
                            retry = false;
                            return;
                        }

                        result.count += persistedResults.Count;
                        result.size  += persistedResults.Sum(x => x.Size);

                        if (Log.IsDebugEnabled)
                        {
                            if (persistedResults.Count > 0)
                            {
                                Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                              persistedResults.Count,
                                                              string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                              index.IndexName, level, sp.Elapsed));
                            }
                            else
                            {
                                Log.Debug("No reduce keys found for {0}", index.IndexName);
                            }
                        }

                        context.CancellationToken.ThrowIfCancellationRequested();

                        var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
                                                     .OrderBy(x => x.Bucket)
                                                     .Distinct()
                                                     .ToArray();
                        foreach (var mappedResultInfo in requiredReduceNextTime)
                        {
                            actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
                                                                  mappedResultInfo.Bucket);
                        }

                        if (level != 2)
                        {
                            var reduceKeysAndBuckets = requiredReduceNextTime
                                                       .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
                                                       .Distinct()
                                                       .ToArray();
                            actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets);
                        }

                        var results = persistedResults
                                      .Where(x => x.Data != null)
                                      .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                      .ToArray();
                        var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey),
                                                              StringComparer.InvariantCultureIgnoreCase);
                        context.ReducedPerSecIncreaseBy(results.Length);

                        context.CancellationToken.ThrowIfCancellationRequested();
                        sp = Stopwatch.StartNew();
                        context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys);
                        Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed,
                                  results.Length, index.IndexName, sp.Elapsed);
                    });
                }
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }

            return(result);
        }
Beispiel #13
0
        private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
                                                   List <object> itemsToDelete)
        {
            var result = new ReduceResultStats();
            var needToMoveToSingleStep = new HashSet <string>();

            Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
            transactionalStorage.Batch(actions =>
            {
                var scheduledItems = actions.MapReduce.GetItemsToReduce
                                     (
                    level: 0,
                    reduceKeys: keysToReduce,
                    index: index.IndexName,
                    itemsToDelete: itemsToDelete,
                    loadData: false,
                    take: int.MaxValue,                                     // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                    itemsAlreadySeen: new HashSet <Tuple <string, int> >()
                                     ).ToList();

                // Only look at the scheduled batch for this run, not the entire set of pending reductions.
                //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray();

                foreach (var reduceKey in keysToReduce)
                {
                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        needToMoveToSingleStep.Add(reduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        continue;
                    }

                    Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
                              reduceKey);

                    // now we are in single step but previously multi step reduce was performed for the given key
                    var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

                    // add scheduled items too to be sure we will delete reduce results of already deleted documents
                    mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

                    foreach (var mappedBucket in mappedBuckets.Distinct())
                    {
                        actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
                        actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
                    }
                }

                var mappedResults = actions.MapReduce.GetMappedResults(
                    index.IndexName,
                    keysToReduce,
                    loadData: true
                    ).ToList();

                result.count += mappedResults.Count;
                result.size  += mappedResults.Sum(x => x.Size);

                var reduceKeys = new HashSet <string>(keysToReduce);

                mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

                var results = mappedResults
                              .Where(x => x.Data != null)
                              .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                              .ToArray();

                context.ReducedPerSecIncreaseBy(results.Length);

                context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys);
            });

            foreach (var reduceKey in needToMoveToSingleStep)
            {
                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
            }

            return(result);
        }
        private IEnumerable <Tuple <IndexToWorkOn, IndexingBatch> > FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs)
        {
            var last = jsonDocs.Last();

            Debug.Assert(last.Etag != null);
            Debug.Assert(last.LastModified != null);

            var lastEtag     = last.Etag.Value;
            var lastModified = last.LastModified.Value;

            var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());

            var documentRetriever = new DocumentRetriever(null, context.ReadTriggers);

            var filteredDocs =
                BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc =>
            {
                doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
                return(doc == null ? null : new { Doc = doc, Json = JsonToExpando.Convert(doc.ToJson()) });
            });

            log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);

            var results = new Tuple <IndexToWorkOn, IndexingBatch> [indexesToWorkOn.Count];
            var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count];

            BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) =>
            {
                var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray());
                if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0)
                {
                    return;
                }

                var indexName     = indexToWorkOn.IndexName;
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
                if (viewGenerator == null)
                {
                    return;                     // probably deleted
                }
                var batch = new IndexingBatch();

                foreach (var item in filteredDocs)
                {
                    // did we already indexed this document in this index?
                    if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0)
                    {
                        continue;
                    }


                    // is the Raven-Entity-Name a match for the things the index executes on?
                    if (viewGenerator.ForEntityNames.Count != 0 &&
                        viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false)
                    {
                        continue;
                    }

                    batch.Add(item.Doc, item.Json);

                    if (batch.DateTime == null)
                    {
                        batch.DateTime = item.Doc.LastModified;
                    }
                    else
                    {
                        batch.DateTime = batch.DateTime > item.Doc.LastModified
                                                                        ? item.Doc.LastModified
                                                                        : batch.DateTime;
                    }
                }

                if (batch.Docs.Count == 0)
                {
                    log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
                              lastEtag, lastModified);
                    // we use it this way to batch all the updates together
                    actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified);
                    return;
                }
                log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn);
                results[i] = Tuple.Create(indexToWorkOn, batch);
            });

            transactionalStorage.Batch(actionsAccessor =>
            {
                foreach (var action in actions)
                {
                    if (action != null)
                    {
                        action(actionsAccessor);
                    }
                }
            });

            return(results.Where(x => x != null));
        }