private IndexingPerformanceStats HandleIndexingFor(IndexingBatchForIndex batchForIndex, Etag lastEtag, DateTime lastModified, CancellationToken token) { currentlyProcessedIndexes.TryAdd(batchForIndex.IndexId, batchForIndex.Index); IndexingPerformanceStats performanceResult = null; try { transactionalStorage.Batch(actions => { performanceResult = IndexDocuments(actions, batchForIndex, token); }); // This can be null if IndexDocument fails to execute and the exception is catched. if (performanceResult != null) { performanceResult.RunCompleted(); } } catch (Exception e) { Log.WarnException("Failed to index " + batchForIndex.Index.PublicName, e); } finally { if (performanceResult != null) { performanceResult.OnCompleted = null; } if (Log.IsDebugEnabled) { Log.Debug("After indexing {0} documents, the new last etag for is: {1} for {2}", batchForIndex.Batch.Docs.Count, lastEtag, batchForIndex.Index.PublicName); } transactionalStorage.Batch(actions => // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed documents actions.Indexing.UpdateLastIndexed(batchForIndex.IndexId, lastEtag, lastModified)); Index _; currentlyProcessedIndexes.TryRemove(batchForIndex.IndexId, out _); } return(performanceResult); }
private IndexingPerformanceStats IndexDocuments(IStorageActionsAccessor actions, IndexingBatchForIndex indexingBatchForIndex, CancellationToken token) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexingBatchForIndex.IndexId); if (viewGenerator == null) { return(null); // index was deleted, probably } var batch = indexingBatchForIndex.Batch; IndexingPerformanceStats performanceStats = null; try { if (Log.IsDebugEnabled) { string ids; if (batch.Ids.Count < 256) { ids = string.Join(",", batch.Ids); } else { ids = string.Join(", ", batch.Ids.Take(128)) + " ... " + string.Join(", ", batch.Ids.Skip(batch.Ids.Count - 128)); } Log.Debug("Indexing {0} documents for index: {1}. ({2})", batch.Docs.Count, indexingBatchForIndex.Index.PublicName, ids); } context.CancellationToken.ThrowIfCancellationRequested(); token.ThrowIfCancellationRequested(); performanceStats = context.IndexStorage.Index(indexingBatchForIndex.IndexId, viewGenerator, batch, context, actions, batch.DateTime ?? DateTime.MinValue, token); } catch (OperationCanceledException) { throw; } catch (Exception e) { if (actions.IsWriteConflict(e)) { return(null); } Log.WarnException(string.Format("Failed to index documents for index: {0}", indexingBatchForIndex.Index.PublicName), e); context.AddError(indexingBatchForIndex.IndexId, indexingBatchForIndex.Index.PublicName, null, e); } return(performanceStats); }
public IndexingPerformanceStats ToIndexingPerformanceStats() { if (_performanceStats != null) { return(_performanceStats); } lock (Stats) { if (_performanceStats != null) { return(_performanceStats); } return(_performanceStats = CreateIndexingPerformanceStats(completed: true)); } }
public void OnDocumentsIndexed(IEnumerable <Document> documents, Analyzer searchAnalyzer) { var indexingPerformanceStats = new IndexingPerformanceStats { Operation = _operationText, Started = SystemTime.UtcNow }; _indexInstance.AddIndexingPerformanceStats(indexingPerformanceStats); var sp = Stopwatch.StartNew(); var enumerableDictionary = new EnumerableDictionary(documents, field, searchAnalyzer); spellChecker.IndexDictionary(enumerableDictionary, workContext.CancellationToken); indexingPerformanceStats.Duration = sp.Elapsed; indexingPerformanceStats.InputCount = enumerableDictionary.NumberOfDocuments; indexingPerformanceStats.ItemsCount = enumerableDictionary.NumberOfTokens; indexingPerformanceStats.OutputCount = enumerableDictionary.NumberOfFields; }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token) { token.ThrowIfCancellationRequested(); context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (LogContext.WithDatabase(context.DatabaseName)) using (MapIndexingInProgress(new List <IndexToWorkOn> { indexToWorkOn })) { var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn> { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } IndexingBatchInfo batchInfo = null; IndexingPerformanceStats performance = null; try { batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List <string> { indexToWorkOn.Index.PublicName }); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token); } finally { if (batchInfo != null) { if (performance != null) { batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); } context.ReportIndexingBatchCompleted(batchInfo); } } } indexReplacer.ReplaceIndexes(new [] { indexToWorkOn.IndexId }); }
public static void WriteIndexingPerformanceStats(this BlittableJsonTextWriter writer, JsonOperationContext context, IndexingPerformanceStats stats) { var djv = (DynamicJsonValue)TypeConverter.ToBlittableSupportedType(stats); writer.WriteObject(context.ReadObject(djv, "index/performance")); }
public IndexingPerformanceStats ExecuteReduction() { var count = 0; var sourceCount = 0; var addDocumentDuration = new Stopwatch(); var convertToLuceneDocumentDuration = new Stopwatch(); var linqExecutionDuration = new Stopwatch(); var deleteExistingDocumentsDuration = new Stopwatch(); var writeToIndexStats = new List <PerformanceStats>(); IndexingPerformanceStats performance = null; parent.Write((indexWriter, analyzer, stats) => { stats.Operation = IndexingWorkStats.Status.Reduce; try { if (Level == 2) { RemoveExistingReduceKeysFromIndex(indexWriter, deleteExistingDocumentsDuration); } foreach (var mappedResults in MappedResultsByBucket) { var input = mappedResults.Select(x => { sourceCount++; return(x); }); IndexingFunc reduceDefinition = ViewGenerator.ReduceDefinition; foreach (var doc in parent.RobustEnumerationReduce(input.GetEnumerator(), reduceDefinition, stats, linqExecutionDuration)) { count++; switch (Level) { case 0: case 1: string reduceKeyAsString = ExtractReduceKey(ViewGenerator, doc); Actions.MapReduce.PutReducedResult(indexId, reduceKeyAsString, Level + 1, mappedResults.Key, mappedResults.Key / 1024, ToJsonDocument(doc)); Actions.General.MaybePulseTransaction(); break; case 2: WriteDocumentToIndex(doc, indexWriter, analyzer, convertToLuceneDocumentDuration, addDocumentDuration); break; default: throw new InvalidOperationException("Unknown level: " + Level); } stats.ReduceSuccesses++; } } } catch (Exception e) { if (Level == 2) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); Context.AddError(indexId, parent.indexDefinition.Name, null, ex, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); } throw; } finally { if (Level == 2) { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); Context.AddError(indexId, parent.indexDefinition.Name, null, e, "Dispose Trigger"); }, x => x.Dispose()); } // TODO: Check if we need to report "Bucket Counts" or "Total Input Elements"? performance = parent.RecordCurrentBatch("Current Reduce #" + Level, "Reduce Level " + Level, sourceCount); } return(new IndexedItemsInfo(null) { ChangedDocs = count + ReduceKeys.Count }); }, writeToIndexStats); var performanceStats = new List <BasePerformanceStats>(); performanceStats.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, linqExecutionDuration.ElapsedMilliseconds)); performanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_DeleteExistingDocument, deleteExistingDocumentsDuration.ElapsedMilliseconds)); performanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds)); performanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDuration.ElapsedMilliseconds)); performanceStats.AddRange(writeToIndexStats); parent.BatchCompleted("Current Reduce #" + Level, "Reduce Level " + Level, sourceCount, count, performanceStats); logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, indexId, string.Join(", ", ReduceKeys))); return(performance); }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var writeToIndexStats = new List <PerformanceStats>(); IndexingPerformanceStats performance = null; var performanceStats = new List <BasePerformanceStats>(); var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet <string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count); var deleteExistingDocumentsDuration = new Stopwatch(); Interlocked.Increment(ref sourceCount); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { token.ThrowIfCancellationRequested(); if (doc.__document_id == null) { throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) { return(doc); } InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { using (StopwatchScope.For(deleteExistingDocumentsDuration)) { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } } return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Lucene_DeleteExistingDocument, DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds }); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); var parallelOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator <dynamic> partition) => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action <Exception, object> onErrorFunc; bool skipDocument = false; var linqExecutionDuration = new Stopwatch(); var addDocumentDutation = new Stopwatch(); var convertToLuceneDocumentDuration = new Stopwatch(); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration)) { token.ThrowIfCancellationRequested(); float boost; IndexingResult indexingResult; using (StopwatchScope.For(convertToLuceneDocumentDuration)) { try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) { continue; } outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); using (StopwatchScope.For(convertToLuceneDocumentDuration)) { luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", PublicName, indexingResult.NewDocId), exception); context.AddError( indexId, PublicName, indexingResult.NewDocId, exception, "OnIndexEntryCreated Trigger"); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); using (StopwatchScope.For(addDocumentDutation)) { AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); } }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering)); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex); context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e); context.AddError(indexId, PublicName, null, e, "Dispose Trigger"); }, x => x.Dispose()); } return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }); }, writeToIndexStats); performanceStats.AddRange(writeToIndexStats); InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats); if (logIndexing.IsDebugEnabled) { logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName); } return(performance); }
private void InitializeIndexingPerformanceCompleteDelegate(IndexingPerformanceStats performance, int sourceCount, int count, List <BasePerformanceStats> performanceStats) { performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats); }
private IndexingPerformanceStats HandleIndexingFor(IndexingBatchForIndex batchForIndex, Etag lastEtag, DateTime lastModified, CancellationToken token) { currentlyProcessedIndexes.TryAdd(batchForIndex.IndexId, batchForIndex.Index); IndexingPerformanceStats performanceResult = null; var wasOutOfMemory = false; var wasOperationCanceled = false; try { transactionalStorage.Batch(actions => { performanceResult = IndexDocuments(actions, batchForIndex, token); }); // This can be null if IndexDocument fails to execute and the exception is catched. if (performanceResult != null) { performanceResult.RunCompleted(); } } catch (OperationCanceledException) { wasOperationCanceled = true; throw; } catch (Exception e) { var exception = e; var aggregateException = exception as AggregateException; if (aggregateException != null) { exception = aggregateException.ExtractSingleInnerException(); } if (TransactionalStorageHelper.IsWriteConflict(exception)) { return(null); } Log.WarnException(string.Format("Failed to index documents for index: {0}", batchForIndex.Index.PublicName), exception); wasOutOfMemory = TransactionalStorageHelper.IsOutOfMemoryException(exception); if (wasOutOfMemory == false) { context.AddError(batchForIndex.IndexId, batchForIndex.Index.PublicName, null, exception); } } finally { if (performanceResult != null) { performanceResult.OnCompleted = null; } if (Log.IsDebugEnabled) { Log.Debug("After indexing {0} documents, the new last etag for is: {1} for {2}", batchForIndex.Batch.Docs.Count, lastEtag, batchForIndex.Index.PublicName); } if (wasOutOfMemory == false && wasOperationCanceled == false) { transactionalStorage.Batch(actions => { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed documents actions.Indexing.UpdateLastIndexed(batchForIndex.IndexId, lastEtag, lastModified); }); } else if (wasOutOfMemory) { HandleOutOfMemory(batchForIndex); } Index _; currentlyProcessedIndexes.TryRemove(batchForIndex.IndexId, out _); } return(performanceResult); }
private IndexingPerformanceStats HandleIndexingFor(IndexingBatchForIndex batchForIndex, Etag lastEtag, DateTime lastModified, CancellationToken token) { if (currentlyProcessedIndexes.TryAdd(batchForIndex.IndexId, batchForIndex.Index) == false) { Log.Error("Entered handle indexing with index {0} inside currentlyProcessedIndexes", batchForIndex.Index.PublicName); batchForIndex.SignalIndexingComplete(); return null; } IndexingPerformanceStats performanceResult = null; var wasOutOfMemory = false; var wasOperationCanceled = false; try { transactionalStorage.Batch(actions => { performanceResult = IndexDocuments(actions, batchForIndex, token); }); // This can be null if IndexDocument fails to execute and the exception is catched. if (performanceResult != null) performanceResult.RunCompleted(); } catch (OperationCanceledException) { wasOperationCanceled = true; throw; } catch (Exception e) { var exception = e; var aggregateException = exception as AggregateException; if (aggregateException != null) exception = aggregateException.ExtractSingleInnerException(); if (TransactionalStorageHelper.IsWriteConflict(exception)) return null; Log.WarnException(string.Format("Failed to index documents for index: {0}", batchForIndex.Index.PublicName), exception); wasOutOfMemory = TransactionalStorageHelper.IsOutOfMemoryException(exception); if (wasOutOfMemory == false) context.AddError(batchForIndex.IndexId, batchForIndex.Index.PublicName, null, exception); } finally { if (performanceResult != null) { performanceResult.OnCompleted = null; } Index _; if (Log.IsDebugEnabled) { Log.Debug("After indexing {0} documents, the new last etag for is: {1} for {2}", batchForIndex.Batch.Docs.Count, lastEtag, batchForIndex.Index.PublicName); } try { if (wasOutOfMemory == false && wasOperationCanceled == false) { bool keepTrying = true; for (int i = 0; i < 10 && keepTrying; i++) { keepTrying = false; transactionalStorage.Batch(actions => { try { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed documents actions.Indexing.UpdateLastIndexed(batchForIndex.IndexId, lastEtag, lastModified); } catch (Exception e) { if (actions.IsWriteConflict(e)) { keepTrying = true; return; } throw; } }); if (keepTrying) Thread.Sleep(11); } } else if (wasOutOfMemory) HandleOutOfMemory(batchForIndex); } finally { currentlyProcessedIndexes.TryRemove(batchForIndex.IndexId, out _); batchForIndex.SignalIndexingComplete(); batchForIndex.Index.IsMapIndexingInProgress = false; } } return performanceResult; }
public void SetIndexingPerformance(IndexingPerformanceStats stats) { indexingPerformanceStats = stats; }
public IndexingPerformanceStatistics() { Performance = new IndexingPerformanceStats[0]; }