Exemple #1
0
            private void WriteDocumentToIndex(object doc, RavenIndexWriter indexWriter, Analyzer analyzer, Stopwatch convertToLuceneDocumentDuration, Stopwatch addDocumentDutation)
            {
                string reduceKeyAsString;

                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                {
                    float boost;
                    try
                    {
                        var fields = GetFields(doc, out boost);

                        reduceKeyAsString = ExtractReduceKey(ViewGenerator, doc);
                        reduceKeyField.SetValue(reduceKeyAsString);
                        reduceValueField.SetValue(ToJsonDocument(doc).ToString(Formatting.None));

                        luceneDoc.GetFields().Clear();
                        luceneDoc.Boost = boost;
                        luceneDoc.Add(reduceKeyField);
                        luceneDoc.Add(reduceValueField);

                        foreach (var field in fields)
                        {
                            luceneDoc.Add(field);
                        }
                    }
                    catch (Exception e)
                    {
                        Context.AddError(indexId,
                                         parent.PublicName,
                                         TryGetDocKey(doc),
                                         e,
                                         "Reduce"
                                         );
                        logIndexing.WarnException("Could not get fields to during reduce for " + parent.PublicName, e);
                        return;
                    }
                }
                batchers.ApplyAndIgnoreAllErrors(
                    exception =>
                {
                    logIndexing.WarnException(
                        string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                      indexId, reduceKeyAsString),
                        exception);
                    Context.AddError(indexId, parent.PublicName, reduceKeyAsString, exception, "OnIndexEntryCreated Trigger");
                },
                    trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc));

                parent.LogIndexedDocument(reduceKeyAsString, luceneDoc);

                using (StopwatchScope.For(addDocumentDutation))
                {
                    parent.AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                }
            }
Exemple #2
0
            private void RemoveExistingReduceKeysFromIndex(RavenIndexWriter indexWriter, Stopwatch deleteExistingDocumentsDuration)
            {
                foreach (var reduceKey in ReduceKeys)
                {
                    var entryKey = reduceKey;
                    parent.InvokeOnIndexEntryDeletedOnAllBatchers(batchers, new Term(Constants.ReduceKeyFieldName, entryKey));

                    using (StopwatchScope.For(deleteExistingDocumentsDuration))
                    {
                        indexWriter.DeleteDocuments(new Term(Constants.ReduceKeyFieldName, entryKey));
                    }
                }
            }
Exemple #3
0
        private bool?MoveNext(IEnumerator en, StatefulEnumerableWrapper <object> innerEnumerator)
        {
            using (StopwatchScope.For(MoveNextDuration))
            {
                try
                {
                    BeforeMoveNext();
                    var moveNext = en.MoveNext();
                    if (moveNext == false)
                    {
                        CancelMoveNext();
                    }

                    return(moveNext);
                }
                catch (Exception e)
                {
                    OnError(e, innerEnumerator.Current);
                }
            }

            return(null);
        }
Exemple #4
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count            = 0;
            var sourceCount      = 0;
            var deleted          = new Dictionary <ReduceKeyAndBucket, int>();
            var performance      = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
            var performanceStats = new List <BasePerformanceStats>();

            var usedStorageAccessors = new ConcurrentSet <IStorageActionsAccessor>();

            if (usedStorageAccessors.TryAdd(actions))
            {
                var storageCommitDuration = new Stopwatch();

                actions.BeforeStorageCommit += storageCommitDuration.Start;

                actions.AfterStorageCommit += () =>
                {
                    storageCommitDuration.Stop();

                    performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                };
            }

            var deleteMappedResultsDuration = new Stopwatch();
            var documentsWrapped            = batch.Docs.Select(doc =>
            {
                token.ThrowIfCancellationRequested();

                sourceCount++;
                var documentId = doc.__document_id;

                using (StopwatchScope.For(deleteMappedResultsDuration))
                {
                    actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                }

                return(doc);
            })
                                              .Where(x => x is FilteredDocument == false)
                                              .ToList();

            performanceStats.Add(new PerformanceStats
            {
                Name       = IndexingOperation.Map_DeleteMappedResults,
                DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
            });

            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

            var parallelProcessingStart = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                token.ThrowIfCancellationRequested();
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                };

                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();

                var linqExecutionDuration            = new Stopwatch();
                var reduceInMapLinqExecutionDuration = new Stopwatch();
                var putMappedResultsDuration         = new Stopwatch();
                var convertToRavenJObjectDuration    = new Stopwatch();

                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        if (usedStorageAccessors.TryAdd(accessor))
                        {
                            var storageCommitDuration = new Stopwatch();

                            accessor.BeforeStorageCommit += storageCommitDuration.Start;

                            accessor.AfterStorageCommit += () =>
                            {
                                storageCommitDuration.Stop();

                                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                            };
                        }

                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        bool skipDocument          = false;

                        foreach (var currentDoc in mapResults)
                        {
                            token.ThrowIfCancellationRequested();

                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            else if (skipDocument)
                            {
                                continue;
                            }

                            RavenJObject currentDocJObject;
                            using (StopwatchScope.For(convertToRavenJObjectDuration))
                            {
                                currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer);
                            }

                            currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject));

                            if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
                            {
                                skipDocument = true;
                                currentDocumentResults.Clear();
                                continue;
                            }

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });

                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                }
            });

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                BatchedOperations = parallelOperations.ToList()
            });

            var updateDocumentReferencesDuration = new Stopwatch();

            using (StopwatchScope.For(updateDocumentReferencesDuration))
            {
                UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
            }
            performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            actions.General.MaybePulseTransaction();

            var parallelReductionOperations = new ConcurrentQueue <ParallelBatchStats>();
            var parallelReductionStart      = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds
                };

                var scheduleReductionsDuration = new Stopwatch();

                using (StopwatchScope.For(scheduleReductionsDuration))
                {
                    while (enumerator.MoveNext())
                    {
                        accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                        accessor.General.MaybePulseTransaction();
                    }
                }

                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                parallelReductionOperations.Enqueue(parallelStats);
            }));

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelReductionOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds,
                BatchedOperations = parallelReductionOperations.ToList()
            });

            UpdateIndexingStats(context, stats);

            performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats);

            logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);

            return(performance);
        }
Exemple #5
0
        private int ProcessBatch(AbstractViewGenerator viewGenerator, List <object> currentDocumentResults, string currentKey, HashSet <ReduceKeyAndBucket> changes,
                                 IStorageActionsAccessor actions,
                                 IDictionary <string, int> statsPerKey, Stopwatch reduceDuringMapLinqExecution, Stopwatch putMappedResultsDuration, Stopwatch convertToRavenJObjectDuration)
        {
            if (currentKey == null || currentDocumentResults.Count == 0)
            {
                return(0);
            }

            var old = CurrentIndexingScope.Current;

            try
            {
                CurrentIndexingScope.Current = null;

                if (logIndexing.IsDebugEnabled)
                {
                    var sb = new StringBuilder()
                             .AppendFormat("Index {0} for document {1} resulted in:", PublicName, currentKey)
                             .AppendLine();
                    foreach (var currentDocumentResult in currentDocumentResults)
                    {
                        sb.AppendLine(JsonConvert.SerializeObject(currentDocumentResult));
                    }
                    logIndexing.Debug(sb.ToString());
                }

                int count = 0;

                var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition, reduceDuringMapLinqExecution);
                foreach (var doc in results)
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, currentKey);
                        continue;
                    }
                    string reduceKey = ReduceKeyToString(reduceValue);

                    RavenJObject data;
                    using (StopwatchScope.For(convertToRavenJObjectDuration))
                    {
                        data = GetMappedData(doc);
                    }

                    logIndexing.Debug("Index {0} for document {1} resulted in ({2}): {3}", PublicName, currentKey, reduceKey, data);

                    using (StopwatchScope.For(putMappedResultsDuration))
                    {
                        actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data);
                    }

                    statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1;
                    actions.General.MaybePulseTransaction();
                    changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey));
                }
                return(count);
            }
            finally
            {
                CurrentIndexingScope.Current = old;
            }
        }
Exemple #6
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count             = 0;
            var sourceCount       = 0;
            var writeToIndexStats = new List <PerformanceStats>();

            IndexingPerformanceStats performance = null;
            var performanceStats = new List <BasePerformanceStats>();

            var storageCommitDuration = new Stopwatch();

            actions.BeforeStorageCommit += storageCommitDuration.Start;

            actions.AfterStorageCommit += () =>
            {
                storageCommitDuration.Stop();

                performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
            };

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                    .Where(x => x != null)
                                    .ToList();

                try
                {
                    performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

                    var deleteExistingDocumentsDuration = new Stopwatch();

                    Interlocked.Increment(ref sourceCount);
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        token.ThrowIfCancellationRequested();
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }

                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                        {
                            using (StopwatchScope.For(deleteExistingDocumentsDuration))
                            {
                                indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                            }
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    performanceStats.Add(new PerformanceStats
                    {
                        Name       = IndexingOperation.Lucene_DeleteExistingDocument,
                        DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
                    });

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();

                    var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                    var parallelProcessingStart = SystemTime.UtcNow;
                    context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator <dynamic> partition) =>
                    {
                        token.ThrowIfCancellationRequested();
                        var parallelStats = new ParallelBatchStats
                        {
                            StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                        };

                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                        {
                            string currentDocId = null;
                            int outputPerDocId  = 0;
                            Action <Exception, object> onErrorFunc;
                            bool skipDocument = false;

                            var linqExecutionDuration           = new Stopwatch();
                            var addDocumentDutation             = new Stopwatch();
                            var convertToLuceneDocumentDuration = new Stopwatch();

                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
                            {
                                token.ThrowIfCancellationRequested();

                                float boost;
                                IndexingResult indexingResult;
                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                                {
                                    try
                                    {
                                        indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                    }
                                    catch (Exception e)
                                    {
                                        onErrorFunc(e, doc);
                                        continue;
                                    }
                                }

                                // ReSharper disable once RedundantBoolCompare --> code clarity
                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
                                {
                                    continue;
                                }
                                if (currentDocId != indexingResult.NewDocId)
                                {
                                    currentDocId   = indexingResult.NewDocId;
                                    outputPerDocId = 0;
                                    skipDocument   = false;
                                }
                                if (skipDocument)
                                {
                                    continue;
                                }
                                outputPerDocId++;
                                if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
                                {
                                    skipDocument = true;
                                    continue;
                                }
                                Interlocked.Increment(ref count);

                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                                {
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                }

                                batchers.ApplyAndIgnoreAllErrors(
                                    exception =>
                                {
                                    logIndexing.WarnException(
                                        string.Format(
                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                            PublicName, indexingResult.NewDocId),
                                        exception);
                                    context.AddError(
                                        indexId,
                                        PublicName,
                                        indexingResult.NewDocId,
                                        exception,
                                        "OnIndexEntryCreated Trigger");
                                },
                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

                                using (StopwatchScope.For(addDocumentDutation))
                                {
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));

                            parallelOperations.Enqueue(parallelStats);
                        }
                    }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering));

                    performanceStats.Add(new ParallelPerformanceStats
                    {
                        NumberOfThreads   = parallelOperations.Count,
                        DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                        BatchedOperations = parallelOperations.ToList()
                    });

                    var updateDocumentReferencesDuration = new Stopwatch();
                    using (StopwatchScope.For(updateDocumentReferencesDuration))
                    {
                        UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
                    }
                    performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
                        context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
                        context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
                    },
                        x => x.Dispose());
                }
                return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
                {
                    ChangedDocs = sourceCount
                });
            }, writeToIndexStats);

            performanceStats.AddRange(writeToIndexStats);

            InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats);

            if (logIndexing.IsDebugEnabled)
            {
                logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);
            }

            return(performance);
        }
Exemple #7
0
        private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator,
                                                          ConcurrentSet <object> itemsToDelete, CancellationToken token)
        {
            var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >();

            if (Log.IsDebugEnabled)
            {
                Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce)));
            }

            var batchTimeWatcher = Stopwatch.StartNew();

            var reducingBatchThrottlerId = Guid.NewGuid();
            var reducePerformanceStats   = new ReducingPerformanceStats(ReduceType.SingleStep);
            var reduceLevelStats         = new ReduceLevelPeformanceStats
            {
                Started = SystemTime.UtcNow,
                Level   = 2
            };

            try
            {
                var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                var parallelProcessingStart = SystemTime.UtcNow;

                BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
                {
                    var parallelStats = new ParallelBatchStats
                    {
                        StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                    };

                    var localNeedToMoveToSingleStep = new HashSet <string>();
                    needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
                    var localKeys = new HashSet <string>();
                    while (enumerator.MoveNext())
                    {
                        token.ThrowIfCancellationRequested();

                        localKeys.Add(enumerator.Current);
                    }

                    transactionalStorage.Batch(actions =>
                    {
                        var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete)
                        {
                            Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway
                        };

                        var getItemsToReduceDuration = Stopwatch.StartNew();

                        int scheduledItemsSum   = 0;
                        int scheduledItemsCount = 0;
                        List <int> scheduledItemsMappedBuckets = new List <int>();
                        using (StopwatchScope.For(getItemsToReduceDuration))
                        {
                            foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token))
                            {
                                scheduledItemsMappedBuckets.Add(item.Bucket);
                                scheduledItemsSum += item.Size;
                                scheduledItemsCount++;
                            }
                        }

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds));

                        autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum);

                        if (scheduledItemsCount == 0)
                        {
                            // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
                            // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
                            // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
                            // That said, they might have smuggled in between versions, or something happened to cause them to be here.
                            // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on

                            Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce));

                            var deletingScheduledReductionsDuration = Stopwatch.StartNew();

                            using (StopwatchScope.For(deletingScheduledReductionsDuration))
                            {
                                foreach (var reduceKey in keysToReduce)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
                                    actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
                                }
                            }

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                        }

                        var removeReduceResultsDuration = new Stopwatch();

                        foreach (var reduceKey in localKeys)
                        {
                            token.ThrowIfCancellationRequested();

                            var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);

                            if (lastPerformedReduceType != ReduceType.SingleStep)
                            {
                                localNeedToMoveToSingleStep.Add(reduceKey);
                            }

                            if (lastPerformedReduceType != ReduceType.MultiStep)
                            {
                                continue;
                            }

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey);
                            }

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                // now we are in single step but previously multi step reduce was performed for the given key
                                var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token);

                                // add scheduled items too to be sure we will delete reduce results of already deleted documents
                                foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets))
                                {
                                    actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
                                    actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
                                }
                            }
                        }

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });
                });

                reduceLevelStats.Operations.Add(new ParallelPerformanceStats
                {
                    NumberOfThreads   = parallelOperations.Count,
                    DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                    BatchedOperations = parallelOperations.ToList()
                });

                var getMappedResultsDuration = new Stopwatch();

                var reductionPerformanceStats = new List <IndexingPerformanceStats>();

                var keysLeftToReduce = new HashSet <string>(keysToReduce);
                while (keysLeftToReduce.Count > 0)
                {
                    var keysReturned = new HashSet <string>();

                    // Try to diminish the allocations happening because of .Resize()
                    var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count);

                    context.TransactionalStorage.Batch(actions =>
                    {
                        var take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                        using (StopwatchScope.For(getMappedResultsDuration))
                        {
                            mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults);
                        }
                    });

                    var count = mappedResults.Count;

                    int size = 0;
                    foreach (var item in mappedResults)
                    {
                        item.Bucket = 0;
                        size       += item.Size;
                    }

                    var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray();

                    context.MetricsCounters.ReducedPerSecond.Mark(results.Length);

                    token.ThrowIfCancellationRequested();

                    var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count);

                    reductionPerformanceStats.Add(performance);

                    autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);
                }

                var needToMoveToSingleStep = new HashSet <string>();

                HashSet <string> set;
                while (needToMoveToSingleStepQueue.TryDequeue(out set))
                {
                    needToMoveToSingleStep.UnionWith(set);
                }

                foreach (var reduceKey in needToMoveToSingleStep)
                {
                    string localReduceKey = reduceKey;
                    transactionalStorage.Batch(actions =>
                                               actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep));
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index

                foreach (var stats in reductionPerformanceStats)
                {
                    reduceLevelStats.Add(stats);
                }

                reducePerformanceStats.LevelStats.Add(reduceLevelStats);
            }
            finally
            {
                long _;
                autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
            }

            return(reducePerformanceStats);
        }
Exemple #8
0
        protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn, CancellationToken token)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);

            if (viewGenerator == null)
            {
                return(null);
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new ConcurrentSet <object>();

            var singleStepReduceKeys = new List <string>();
            var multiStepsReduceKeys = new List <string>();

            transactionalStorage.Batch(actions =>
            {
                var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
                                                                                context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                                context.NumberOfItemsToExecuteReduceInSingleStep, token);

                foreach (var key in mappedResultsInfo)
                {
                    token.ThrowIfCancellationRequested();

                    switch (key.OperationTypeToPerform)
                    {
                    case ReduceType.SingleStep:
                        singleStepReduceKeys.Add(key.ReduceKey);
                        break;

                    case ReduceType.MultiStep:
                        multiStepsReduceKeys.Add(key.ReduceKey);
                        break;
                    }
                }
            });

            currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);

            var performanceStats = new List <ReducingPerformanceStats>();

            try
            {
                if (singleStepReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("SingleStep reduce for keys: {0}", string.Join(",", singleStepReduceKeys));
                    }

                    var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, token);

                    performanceStats.Add(singleStepStats);
                }

                if (multiStepsReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("MultiStep reduce for keys: {0}", string.Join(",", multiStepsReduceKeys));
                    }

                    var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, token);

                    performanceStats.Add(multiStepStats);
                }
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            catch (AggregateException e)
            {
                var anyOperationsCanceled = e
                                            .InnerExceptions
                                            .OfType <OperationCanceledException>()
                                            .Any();

                if (anyOperationsCanceled == false)
                {
                    throw;
                }

                operationCanceled = true;
            }
            finally
            {
                var postReducingOperations = new ReduceLevelPeformanceStats
                {
                    Level   = -1,
                    Started = SystemTime.UtcNow
                };

                if (operationCanceled == false)
                {
                    var deletingScheduledReductionsDuration = new Stopwatch();
                    var storageCommitDuration = new Stopwatch();

                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        actions.BeforeStorageCommit += storageCommitDuration.Start;
                        actions.AfterStorageCommit  += storageCommitDuration.Stop;

                        ScheduledReductionInfo latest;

                        using (StopwatchScope.For(deletingScheduledReductionsDuration))
                        {
                            latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);
                        }

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexId, latest.Etag, latest.Timestamp);
                    });

                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                }

                postReducingOperations.Completed = SystemTime.UtcNow;
                postReducingOperations.Duration  = postReducingOperations.Completed - postReducingOperations.Started;

                performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
                {
                    LevelStats = new List <ReduceLevelPeformanceStats> {
                        postReducingOperations
                    }
                });

                Index _;
                currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
            }

            return(performanceStats.ToArray());
        }
Exemple #9
0
        private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token)
        {
            var needToMoveToMultiStep = new HashSet <string>();

            transactionalStorage.Batch(actions =>
            {
                foreach (var localReduceKey in keysToReduce)
                {
                    token.ThrowIfCancellationRequested();

                    var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);

                    if (lastPerformedReduceType != ReduceType.MultiStep)
                    {
                        needToMoveToMultiStep.Add(localReduceKey);
                    }

                    if (lastPerformedReduceType != ReduceType.SingleStep)
                    {
                        continue;
                    }

                    // we exceeded the limit of items to reduce in single step
                    // now we need to schedule reductions at level 0 for all map results with given reduce key
                    var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList();
                    foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
                    {
                        actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
                    }
                }
            });

            var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep);


            for (int i = 0; i < 3; i++)
            {
                var level = i;

                var reduceLevelStats = new ReduceLevelPeformanceStats()
                {
                    Level   = level,
                    Started = SystemTime.UtcNow,
                };

                var reduceParams = new GetItemsToReduceParams(
                    index.IndexId,
                    new HashSet <string>(keysToReduce),
                    level,
                    true,
                    itemsToDelete);

                var gettingItemsToReduceDuration = new Stopwatch();
                var scheduleReductionsDuration   = new Stopwatch();
                var removeReduceResultsDuration  = new Stopwatch();
                var storageCommitDuration        = new Stopwatch();

                bool retry = true;
                while (retry && reduceParams.ReduceKeys.Count > 0)
                {
                    var reduceBatchAutoThrottlerId = Guid.NewGuid();
                    try
                    {
                        transactionalStorage.Batch(actions =>
                        {
                            token.ThrowIfCancellationRequested();

                            actions.BeforeStorageCommit += storageCommitDuration.Start;
                            actions.AfterStorageCommit  += storageCommitDuration.Stop;

                            var batchTimeWatcher = Stopwatch.StartNew();

                            reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                            int size = 0;

                            IList <MappedResultInfo> persistedResults;
                            var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
                            using (StopwatchScope.For(gettingItemsToReduceDuration))
                            {
                                persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token);

                                foreach (var item in persistedResults)
                                {
                                    reduceKeys.Add(item.ReduceKey);
                                    size += item.Size;
                                }
                            }

                            if (persistedResults.Count == 0)
                            {
                                retry = false;
                                return;
                            }

                            var count = persistedResults.Count;

                            autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);

                            if (Log.IsDebugEnabled)
                            {
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                                                  persistedResults.Count,
                                                                  string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                                                  index.Index.PublicName, level, batchTimeWatcher.Elapsed));
                                }
                                else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.Index.PublicName);
                                }
                            }

                            token.ThrowIfCancellationRequested();


                            var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                            using (StopwatchScope.For(removeReduceResultsDuration))
                            {
                                foreach (var mappedResultInfo in requiredReduceNextTimeSet)
                                {
                                    token.ThrowIfCancellationRequested();

                                    actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
                                }
                            }

                            if (level != 2)
                            {
                                var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

                                using (StopwatchScope.For(scheduleReductionsDuration))
                                {
                                    foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet)
                                    {
                                        token.ThrowIfCancellationRequested();

                                        actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
                                    }
                                }
                            }

                            token.ThrowIfCancellationRequested();

                            var reduceTimeWatcher = Stopwatch.StartNew();

                            var results = persistedResults.Where(x => x.Data != null)
                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
                                          .ToList();

                            var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            context.MetricsCounters.ReducedPerSecond.Mark(results.Count());

                            reduceLevelStats.Add(performance);

                            var batchDuration = batchTimeWatcher.Elapsed;

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level);
                            }

                            autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
                        });
                    }
                    finally
                    {
                        long _;
                        autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
                    }
                }

                reduceLevelStats.Completed = SystemTime.UtcNow;
                reduceLevelStats.Duration  = reduceLevelStats.Completed - reduceLevelStats.Started;

                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
                reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));

                reducePerformance.LevelStats.Add(reduceLevelStats);
            }

            foreach (var reduceKey in needToMoveToMultiStep)
            {
                token.ThrowIfCancellationRequested();

                string localReduceKey = reduceKey;
                transactionalStorage.Batch(actions =>
                                           actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey,
                                                                                       ReduceType.MultiStep));
            }

            return(reducePerformance);
        }
Exemple #10
0
        public override void Remove(string[] keys, WorkContext context)
        {
            DeletionBatchInfo deletionBatchInfo = null;

            try
            {
                deletionBatchInfo = context.ReportDeletionBatchStarted(PublicName, keys.Length);

                context.TransactionalStorage.Batch(actions =>
                {
                    var storageCommitDuration = new Stopwatch();

                    actions.BeforeStorageCommit += storageCommitDuration.Start;

                    actions.AfterStorageCommit += () =>
                    {
                        storageCommitDuration.Stop();

                        deletionBatchInfo.PerformanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                    };

                    var reduceKeyAndBuckets = new Dictionary <ReduceKeyAndBucket, int>();

                    var deleteMappedResultsDuration = new Stopwatch();

                    using (StopwatchScope.For(deleteMappedResultsDuration))
                    {
                        if (actions.MapReduce.HasMappedResultsForIndex(indexId))
                        {
                            foreach (var key in keys)
                            {
                                actions.MapReduce.DeleteMappedResultsForDocumentId(key, indexId, reduceKeyAndBuckets);
                                context.CancellationToken.ThrowIfCancellationRequested();
                            }
                        }
                    }

                    deletionBatchInfo.PerformanceStats.Add(PerformanceStats.From(IndexingOperation.Delete_DeleteMappedResultsForDocumentId, deleteMappedResultsDuration.ElapsedMilliseconds));

                    actions.MapReduce.UpdateRemovedMapReduceStats(indexId, reduceKeyAndBuckets, context.CancellationToken);

                    var scheduleReductionsDuration = new Stopwatch();

                    using (StopwatchScope.For(scheduleReductionsDuration))
                    {
                        foreach (var reduceKeyAndBucket in reduceKeyAndBuckets)
                        {
                            actions.MapReduce.ScheduleReductions(indexId, 0, reduceKeyAndBucket.Key);
                            context.CancellationToken.ThrowIfCancellationRequested();
                        }
                    }

                    deletionBatchInfo.PerformanceStats.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                });
            }
            finally
            {
                if (deletionBatchInfo != null)
                {
                    context.ReportDeletionBatchCompleted(deletionBatchInfo);
                }
            }
        }
Exemple #11
0
        public override void Remove(string[] keys, WorkContext context)
        {
            DeletionBatchInfo deletionBatchInfo = null;

            try
            {
                deletionBatchInfo = context.ReportDeletionBatchStarted(PublicName, keys.Length);

                Write((writer, analyzer, stats) =>
                {
                    var indexUpdateTriggersDuration = new Stopwatch();

                    stats.Operation = IndexingWorkStats.Status.Ignore;
                    if (logIndexing.IsDebugEnabled)
                    {
                        logIndexing.Debug(() => string.Format("Deleting ({0}) from {1}", string.Join(", ", keys), PublicName));
                    }

                    List <AbstractIndexUpdateTriggerBatcher> batchers;
                    using (StopwatchScope.For(indexUpdateTriggersDuration))
                    {
                        batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                   .Where(x => x != null)
                                   .ToList();

                        keys.Apply(key =>
                                   InvokeOnIndexEntryDeletedOnAllBatchers(batchers, new Term(Constants.DocumentIdFieldName, key.ToLowerInvariant())));
                    }

                    var deleteDocumentsDuration = new Stopwatch();

                    using (StopwatchScope.For(deleteDocumentsDuration))
                    {
                        writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
                    }

                    deletionBatchInfo.PerformanceStats.Add(PerformanceStats.From(IndexingOperation.Delete_Documents, deleteDocumentsDuration.ElapsedMilliseconds));

                    using (StopwatchScope.For(indexUpdateTriggersDuration))
                    {
                        batchers.ApplyAndIgnoreAllErrors(
                            e =>
                        {
                            logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
                            context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
                        },
                            batcher => batcher.Dispose());
                    }

                    deletionBatchInfo.PerformanceStats.Add(PerformanceStats.From(IndexingOperation.Delete_IndexUpdateTriggers, indexUpdateTriggersDuration.ElapsedMilliseconds));

                    return(new IndexedItemsInfo(GetLastEtagFromStats())
                    {
                        ChangedDocs = keys.Length,
                        DeletedKeys = keys
                    });
                }, deletionBatchInfo.PerformanceStats);
            }
            finally
            {
                if (deletionBatchInfo != null)
                {
                    context.ReportDeletionBatchCompleted(deletionBatchInfo);
                }
            }
        }
        protected ReducingPerformanceStats[] HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);

            if (viewGenerator == null)
            {
                return(null);
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new ConcurrentSet <object>();

            IList <ReduceTypePerKey> mappedResultsInfo = null;

            transactionalStorage.Batch(actions =>
            {
                mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
                                                                            context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                            context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
            });

            var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
            var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();

            currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);

            var performanceStats = new List <ReducingPerformanceStats>();

            try
            {
                if (singleStepReduceKeys.Length > 0)
                {
                    Log.Debug("SingleStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ","));
                    var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);

                    performanceStats.Add(singleStepStats);
                }

                if (multiStepsReduceKeys.Length > 0)
                {
                    Log.Debug("MultiStep reduce for keys: {0}", multiStepsReduceKeys.Select(x => x + ","));
                    var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);

                    performanceStats.Add(multiStepStats);
                }
            }
            catch (OperationCanceledException)
            {
                operationCanceled = true;
            }
            finally
            {
                var postReducingOperations = new ReduceLevelPeformanceStats
                {
                    Level   = -1,
                    Started = SystemTime.UtcNow
                };

                if (operationCanceled == false)
                {
                    var deletingScheduledReductionsDuration = new Stopwatch();
                    var storageCommitDuration = new Stopwatch();

                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        actions.BeforeStorageCommit += storageCommitDuration.Start;
                        actions.AfterStorageCommit  += storageCommitDuration.Stop;

                        ScheduledReductionInfo latest;

                        using (StopwatchScope.For(deletingScheduledReductionsDuration))
                        {
                            latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);
                        }

                        if (latest == null)
                        {
                            return;
                        }
                        actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp);
                    });

                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                }

                postReducingOperations.Completed = SystemTime.UtcNow;
                postReducingOperations.Duration  = postReducingOperations.Completed - postReducingOperations.Started;

                performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
                {
                    LevelStats = new List <ReduceLevelPeformanceStats> {
                        postReducingOperations
                    }
                });

                Index _;
                currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
            }

            return(performanceStats.ToArray());
        }
Exemple #13
0
        protected ReducingPerformanceStats[] HandleReduceForIndex(
            IndexToWorkOn indexToWorkOn, bool skipIncreasingBatchSize, CancellationToken token)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);

            if (viewGenerator == null)
            {
                return(null);
            }

            bool operationCanceled = false;
            var  itemsToDelete     = new ConcurrentSet <object>();

            var singleStepReduceKeys = new List <string>();
            var multiStepsReduceKeys = new List <string>();

            transactionalStorage.Batch(actions =>
            {
                var mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
                                                                                context.CurrentNumberOfItemsToReduceInSingleBatch,
                                                                                context.NumberOfItemsToExecuteReduceInSingleStep, token);

                foreach (var key in mappedResultsInfo)
                {
                    token.ThrowIfCancellationRequested();

                    switch (key.OperationTypeToPerform)
                    {
                    case ReduceType.SingleStep:
                        singleStepReduceKeys.Add(key.ReduceKey);
                        break;

                    case ReduceType.MultiStep:
                        multiStepsReduceKeys.Add(key.ReduceKey);
                        break;
                    }
                }
            });

            var performanceStats = new List <ReducingPerformanceStats>();

            try
            {
                if (singleStepReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("SingleStep reduce for keys: {0}", string.Join(",", singleStepReduceKeys));
                    }

                    var singleStepStats = SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete, skipIncreasingBatchSize, token);

                    performanceStats.Add(singleStepStats);
                }

                if (multiStepsReduceKeys.Count > 0)
                {
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("MultiStep reduce for keys: {0}", string.Join(",", multiStepsReduceKeys));
                    }

                    var multiStepStats = MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete, skipIncreasingBatchSize, token);

                    performanceStats.Add(multiStepStats);
                }
            }
            catch (IndexDoesNotExistsException)
            {
                // race condition -> index was deleted
                // we can ignore this
                operationCanceled = true;
            }
            catch (ObjectDisposedException)
            {
                // index was disposed
                // we can ignore this
                operationCanceled = true;
            }
            catch (Exception e)
            {
                if (HandleIfOutOfMemory(e, new OutOfMemoryDetails
                {
                    Index = indexToWorkOn.Index,
                    FailedItemsToProcessCount = singleStepReduceKeys.Count + multiStepsReduceKeys.Count,
                    IsReducing = true
                }))
                {
                    //if we got a OOME we need to decrease the batch size
                    operationCanceled = true;
                    return(null);
                }

                if (IsOperationCanceledException(e))
                {
                    operationCanceled = true;
                    return(null);
                }

                var message = $"Failed to reduce index: {indexToWorkOn.Index.PublicName} (id: {indexToWorkOn.IndexId}) " +
                              $"{singleStepReduceKeys.Count} single step keys and {multiStepsReduceKeys.Count} multi step keys. " +
                              "Skipping this batch (it won't be reduced)";

                indexToWorkOn.Index.AddIndexingError(e, message);
            }
            finally
            {
                var postReducingOperations = new ReduceLevelPeformanceStats
                {
                    Level   = -1,
                    Started = SystemTime.UtcNow
                };

                if (operationCanceled == false)
                {
                    // need to flush the changes made to the map-reduce index
                    // before commiting the deletions of the scheduled reductions
                    context.IndexStorage.FlushIndex(indexToWorkOn.IndexId, onlyAddIndexError: true);

                    var deletingScheduledReductionsDuration = new Stopwatch();
                    var storageCommitDuration = new Stopwatch();

                    // whatever we succeeded in indexing or not, we have to update this
                    // because otherwise we keep trying to re-index failed mapped results
                    transactionalStorage.Batch(actions =>
                    {
                        actions.BeforeStorageCommit += storageCommitDuration.Start;
                        actions.AfterStorageCommit  += storageCommitDuration.Stop;

                        ScheduledReductionInfo latest;

                        using (StopwatchScope.For(deletingScheduledReductionsDuration))
                        {
                            latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete, token);
                        }

                        if (latest == null)
                        {
                            return;
                        }

                        actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexId, latest.Etag, latest.Timestamp);
                    });

                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
                    postReducingOperations.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                }

                postReducingOperations.Completed = SystemTime.UtcNow;
                postReducingOperations.Duration  = postReducingOperations.Completed - postReducingOperations.Started;

                performanceStats.Add(new ReducingPerformanceStats(ReduceType.None)
                {
                    LevelStats = new List <ReduceLevelPeformanceStats> {
                        postReducingOperations
                    }
                });
            }

            return(performanceStats.ToArray());
        }