public bool Execute(DocumentsOperationContext databaseContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, CancellationToken token) { if (_mapReduceContext.StoreByReduceKeyHash.Count == 0) { WriteLastEtags(indexContext); // we need to write etags here, because if we filtered everything during map then we will loose last indexed etag information and this will cause an endless indexing loop return(false); } ReduceResultsSchema.Create(indexContext.Transaction.InnerTransaction, PageNumberToReduceResultTableName, 32); var table = indexContext.Transaction.InnerTransaction.OpenTable(ReduceResultsSchema, PageNumberToReduceResultTableName); var lowLevelTransaction = indexContext.Transaction.InnerTransaction.LowLevelTransaction; var writer = writeOperation.Value; var treeScopeStats = stats.For(IndexingOperation.Reduce.TreeScope, start: false); var nestedValuesScopeStats = stats.For(IndexingOperation.Reduce.NestedValuesScope, start: false); foreach (var store in _mapReduceContext.StoreByReduceKeyHash) { token.ThrowIfCancellationRequested(); using (var reduceKeyHash = indexContext.GetLazyString(store.Key.ToString(CultureInfo.InvariantCulture))) using (store.Value) using (_aggregationBatch) { var modifiedStore = store.Value; switch (modifiedStore.Type) { case MapResultsStorageType.Tree: using (treeScopeStats.Start()) { HandleTreeReduction(indexContext, treeScopeStats, modifiedStore, lowLevelTransaction, writer, reduceKeyHash, table, token); } break; case MapResultsStorageType.Nested: using (nestedValuesScopeStats.Start()) { HandleNestedValuesReduction(indexContext, nestedValuesScopeStats, modifiedStore, writer, reduceKeyHash, token); } break; default: throw new ArgumentOutOfRangeException(modifiedStore.Type.ToString()); } } } using (MemoryUsageGuard.GetProcessMemoryUsage(out var memoryUsage, out _)) { stats.RecordReduceMemoryStats(memoryUsage.WorkingSet, memoryUsage.PrivateMemory); } WriteLastEtags(indexContext); return(false); }
private void EnsureValidNestedValuesReductionStats(IndexingStatsScope stats) { if (_nestedValuesReductionStatsInstance == stats) { return; } _nestedValuesReductionStatsInstance = stats; _nestedValuesReductionStats.NestedValuesRead = stats.For(IndexingOperation.Reduce.NestedValuesRead, start: false); _nestedValuesReductionStats.NestedValuesAggregation = stats.For(IndexingOperation.Reduce.NestedValuesAggregation, start: false); }
private void EnsureValidStats(IndexingStatsScope stats) { if (_statsInstance == stats) { return; } _statsInstance = stats; _stats.BlittableJsonAggregation = stats.For(IndexingOperation.Reduce.BlittableJsonAggregation, start: false); _stats.CreateBlittableJson = stats.For(IndexingOperation.Reduce.CreateBlittableJson, start: false); }
private void EnsureValidStats(IndexingStatsScope stats) { if (_statsInstance == stats) { return; } _statsInstance = stats; _stats.DeleteStats = stats.For(IndexingOperation.Lucene.Delete, start: false); _stats.AddStats = stats.For(IndexingOperation.Lucene.AddDocument, start: false); _stats.ConvertStats = stats.For(IndexingOperation.Lucene.Convert, start: false); }
private void EnsureValidTreeReductionStats(IndexingStatsScope stats) { if (_treeReductionStatsInstance == stats) { return; } _treeReductionStatsInstance = stats; _treeReductionStats.LeafAggregation = stats.For(IndexingOperation.Reduce.LeafAggregation, start: false); _treeReductionStats.BranchAggregation = stats.For(IndexingOperation.Reduce.BranchAggregation, start: false); _treeReductionStats.StoringReduceResult = stats.For(IndexingOperation.Reduce.StoringReduceResult, start: false); }
private void EnsureValidStats(IndexingStatsScope stats) { if (_statsInstance == stats) { return; } _statsInstance = stats; _stats.GetMapEntriesTree = stats.For(IndexingOperation.Reduce.GetMapEntriesTree, start: false); _stats.GetMapEntries = stats.For(IndexingOperation.Reduce.GetMapEntries, start: false); _stats.RemoveResult = stats.For(IndexingOperation.Reduce.RemoveMapResult, start: false); _stats.PutResult = stats.For(IndexingOperation.Reduce.PutMapResult, start: false); }
public override void Commit(IndexingStatsScope stats) { var enqueue = CommitOutputReduceToCollection(); using (_txHolder.AcquireTransaction(out _)) { base.Commit(stats); } try { using (stats.For(IndexingOperation.Reduce.SaveOutputDocuments)) { enqueue.GetAwaiter().GetResult(); } } catch (OperationCanceledException) { throw; } catch (ObjectDisposedException e) when(DocumentDatabase.DatabaseShutdown.IsCancellationRequested) { throw new OperationCanceledException("The operation of writing output reduce documents was cancelled because of database shutdown", e); } catch (Exception e) when(e.IsOutOfMemory() || e is DiskFullException) { throw; } catch (Exception e) { throw new IndexWriteException("Failed to save output reduce documents to disk", e); } }
protected override bool ApplyDeletes(IState state) { using (_commitStats?.For(IndexingOperation.Lucene.ApplyDeletes)) { return(base.ApplyDeletes(state)); } }
public bool DeleteDocuments(IndexingStatsScope stats, TransactionOperationContext indexContext) { var database = _index.DocumentDatabase; const int deleteBatchSize = 1024; var prefixesToDelete = new List <string>(); var deleted = false; using (stats.For(IndexingOperation.Reduce.DeleteOutputDocuments)) { var tree = indexContext.Transaction.InnerTransaction.CreateTree(PrefixesOfReduceOutputDocumentsToDeleteTree); using (var it = tree.Iterate(false)) { if (it.Seek(Slices.BeforeAllKeys)) { do { var toDelete = GetPrefixToDeleteAndOriginalPatternFromCurrent(it); var command = new DeleteReduceOutputDocumentsCommand(database, toDelete.Prefix, toDelete.OriginalPattern, deleteBatchSize); var enqueue = database.TxMerger.Enqueue(command); try { enqueue.GetAwaiter().GetResult(); } catch (Exception e) { throw new IndexWriteException("Failed to delete output reduce documents", e); } if (command.DeleteCount < deleteBatchSize) { prefixesToDelete.Add(toDelete.Prefix); } if (command.DeleteCount > 0) { deleted = true; } } while (it.MoveNext()); } } foreach (var prefix in prefixesToDelete) { DeletePrefixOfReduceOutputDocumentsToDelete(prefix, indexContext); } } return(deleted); }
public unsafe List <DynamicAttachment> LoadAttachments(string documentId, IEnumerable <string> attachmentNames) { using (_loadAttachmentStats?.Start() ?? (_loadAttachmentStats = _stats?.For(IndexingOperation.LoadAttachment))) { var results = new List <DynamicAttachment>(); foreach (var attachmentName in attachmentNames) { var attachment = _documentsStorage.AttachmentsStorage.GetAttachment(QueryContext.Documents, documentId, attachmentName, AttachmentType.Document, null); if (attachment == null) { continue; } results.Add(new DynamicAttachment(attachment)); } return(results); } }
public StaticIndexDocsEnumerator(IEnumerable <Document> docs, IndexingFunc func, string collection, IndexingStatsScope stats, EnumerationType enumerationType) { _documentReadStats = stats?.For(IndexingOperation.Map.DocumentRead, start: false); _enumerationType = enumerationType; _docsEnumerator = docs.GetEnumerator(); switch (enumerationType) { case EnumerationType.Index: var linqStats = stats?.For(IndexingOperation.Map.Linq, start: false); _resultsOfCurrentDocument = new TimeCountingEnumerable(func(new DynamicIteratonOfCurrentDocumentWrapper(this)), linqStats); CurrentIndexingScope.Current.SetSourceCollection(collection, linqStats); break; case EnumerationType.Transformer: _resultsOfCurrentDocument = func(new DynamicIteratonOfCurrentDocumentWrapper(this)); break; } }
public void InitializeForEnumeration(IEnumerable items, TransactionOperationContext indexContext, IndexingStatsScope stats) { _items = items; _indexContext = indexContext; if (_stats == stats) return; _stats = stats; _createBlittableResultStats = _stats.For(IndexingOperation.Reduce.CreateBlittableJson, start: false); }
public StaticIndexDocsEnumerator(IEnumerable <Document> docs, List <IndexingFunc> funcs, string collection, IndexingStatsScope stats) : this(docs) { _documentReadStats = stats?.For(IndexingOperation.Map.DocumentRead, start: false); var linqStats = stats?.For(IndexingOperation.Map.Linq, start: false); if (funcs.Count == 1) { ResultsOfCurrentDocument = new TimeCountingEnumerable(funcs[0](new DynamicIteratonOfCurrentDocumentWrapper(this)), linqStats); } else { _multipleIndexingFunctionsEnumerator = new MultipleIndexingFunctionsEnumerator(funcs, new DynamicIteratonOfCurrentDocumentWrapper(this)); ResultsOfCurrentDocument = new TimeCountingEnumerable(_multipleIndexingFunctionsEnumerator, linqStats); } CurrentIndexingScope.Current.SetSourceCollection(collection, linqStats); }
public StaticIndexDocsEnumerator(IEnumerable <Document> docs, List <IndexingFunc> funcs, string collection, IndexingStatsScope stats, IndexType type) : this(docs) { _documentReadStats = stats?.For(IndexingOperation.Map.DocumentRead, start: false); var indexingFunctionType = type.IsJavaScript() ? IndexingOperation.Map.Jint : IndexingOperation.Map.Linq; var mapFuncStats = stats?.For(indexingFunctionType, start: false); if (funcs.Count == 1) { _resultsOfCurrentDocument = new TimeCountingEnumerable(funcs[0](new DynamicIteratonOfCurrentDocumentWrapper(this)), mapFuncStats); } else { _multipleIndexingFunctionsEnumerator = new MultipleIndexingFunctionsEnumerator(funcs, new DynamicIteratonOfCurrentDocumentWrapper(this)); _resultsOfCurrentDocument = new TimeCountingEnumerable(_multipleIndexingFunctionsEnumerator, mapFuncStats); } CurrentIndexingScope.Current.SetSourceCollection(collection, mapFuncStats); }
public unsafe dynamic LoadDocument(LazyStringValue keyLazy, string keyString, string collectionName) { using (_loadDocumentStats?.Start() ?? (_loadDocumentStats = _stats?.For(IndexingOperation.LoadDocument))) { if (keyLazy == null && keyString == null) { return(DynamicNullObject.Null); } var source = Source; var id = GetSourceId(source); if (source is DynamicBlittableJson) { if (keyLazy != null && id.Equals(keyLazy)) { return(source); } if (keyString != null && id.Equals(keyString)) { return(source); } } if (TryGetKeySlice(keyLazy, keyString, out var keySlice) == false) { return(DynamicNullObject.Null); } // we intentionally don't dispose of the scope here, this is being tracked by the references // and will be disposed there. Slice.From(QueryContext.Documents.Allocator, id, out var idSlice); var references = GetReferencesForItem(idSlice); references.Add(keySlice); // when there is conflict, we need to apply same behavior as if the document would not exist var document = _documentsStorage.Get(QueryContext.Documents, keySlice, throwOnConflict: false); if (document == null) { return(DynamicNullObject.Null); } // we can't share one DynamicBlittableJson instance among all documents because we can have multiple LoadDocuments in a single scope return(new DynamicBlittableJson(document)); } }
public override void Commit(IndexingStatsScope stats) { var enqueue = DocumentDatabase.TxMerger.Enqueue(_outputReduceToCollectionCommand); base.Commit(stats); try { using (stats.For(IndexingOperation.Reduce.SaveOutputDocuments)) { enqueue.GetAwaiter().GetResult(); } } catch (Exception e) { throw new IndexWriteException("Failed to save output reduce documents to disk", e); } }
public virtual void Commit(IndexingStatsScope stats) { if (_writer != null) { using (stats.For(IndexingOperation.Lucene.Commit)) { _writer.Commit(_state); // just make sure changes are flushed to disk if (_hasSuggestions) { foreach (var item in _suggestionsWriters) { item.Value.Commit(_state); } } } } }
public virtual void Commit(IndexingStatsScope stats) { if (_writer != null) // TODO && _persistance._indexWriter.RamSizeInBytes() >= long.MaxValue) { using (stats.For(IndexingOperation.Lucene.FlushToDisk)) { _writer.Commit(_state); // just make sure changes are flushed to disk if (_hasSuggestions) { foreach (var item in _suggestionsWriters) { item.Value.Commit(_state); } } } } }
public override void Merge(IndexWriter writer, IState state) { using (var mergeStats = _commitStats?.For(IndexingOperation.Lucene.Merge)) { var sp = Stopwatch.StartNew(); lock (this) { var totalMergesCount = writer.PendingMergesCount; mergeStats?.RecordPendingMergesCount(totalMergesCount); var executedMerges = 0; while (true) { if (sp.Elapsed > _maxMergeTime) { if (writer.PendingMergesCount > 0) { _index.ScheduleIndexingRun(); // we stop before we are done merging, force a new batch } break; } MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { break; } executedMerges++; mergeStats?.RecordMergeStats(merge.Stats); writer.Merge(merge, state); var diff = writer.PendingMergesCount - totalMergesCount + executedMerges; if (diff > 0) { // more merges can be created after a successful merge mergeStats?.RecordPendingMergesCount(diff); totalMergesCount += diff; } } } } }
public override void Commit(IndexingStatsScope stats) { var enqueue = CommitOutputReduceToCollection(); using (_txHolder.AcquireTransaction(out _)) { base.Commit(stats); } try { using (stats.For(IndexingOperation.Reduce.SaveOutputDocuments)) { enqueue.GetAwaiter().GetResult(); } } catch (Exception e) { throw new IndexWriteException("Failed to save output reduce documents to disk", e); } }
protected override AggregationResult AggregateOn(List <BlittableJsonReaderObject> aggregationBatch, TransactionOperationContext indexContext, IndexingStatsScope stats, CancellationToken token) { _blittableToDynamicWrapper.InitializeForEnumeration(aggregationBatch); var resultObjects = new List <object>(); var indexingFunctionType = _indexType.IsJavaScript() ? IndexingOperation.Map.Jint : IndexingOperation.Map.Linq; var funcStats = stats?.For(indexingFunctionType, start: false); foreach (var output in new TimeCountingEnumerable(_reducingFunc(_blittableToDynamicWrapper), funcStats)) { token.ThrowIfCancellationRequested(); if (_propertyAccessor == null) { _propertyAccessor = PropertyAccessor.Create(output.GetType()); } resultObjects.Add(output); } return(new AggregatedAnonymousObjects(resultObjects, _propertyAccessor, indexContext)); }
public StaticIndexItemEnumerator(IEnumerable <IndexItem> items, IIndexItemFilterBehavior filter, Dictionary <string, List <IndexingFunc> > funcs, string collection, IndexingStatsScope stats, IndexType type) { _itemsEnumerator = items.GetEnumerator(); _documentReadStats = stats?.For(IndexingOperation.Map.DocumentRead, start: false); var indexingFunctionType = type.IsJavaScript() ? IndexingOperation.Map.Jint : IndexingOperation.Map.Linq; var mapFuncStats = stats?.For(indexingFunctionType, start: false); _resultsOfCurrentDocument = new Dictionary <string, IEnumerable>(StringComparer.OrdinalIgnoreCase); _singleKey = funcs.Count == 1; foreach (var kvp in funcs) { if (_singleKey) { _firstKey = kvp.Key; } if (_allItems == false) { switch (kvp.Key) { case Constants.Documents.Collections.AllDocumentsCollection: { _allItems = true; _allItemsKey = Constants.Documents.Collections.AllDocumentsCollection; break; } case Constants.Counters.All: { _allItems = true; _allItemsKey = Constants.Counters.All; break; } case Constants.TimeSeries.All: { _allItems = true; _allItemsKey = Constants.TimeSeries.All; break; } } } if (kvp.Value.Count == 1) { _resultsOfCurrentDocument[kvp.Key] = new TimeCountingEnumerable(kvp.Value[0](new DynamicIteratorOfCurrentItemWrapper <TType>(this)), mapFuncStats); } else { if (_multipleIndexingFunctionsEnumerator == null) { _multipleIndexingFunctionsEnumerator = new Dictionary <string, MultipleIndexingFunctionsEnumerator <TType> >(StringComparer.OrdinalIgnoreCase); } var multipleIndexingFunctionsEnumerator = _multipleIndexingFunctionsEnumerator[kvp.Key] = new MultipleIndexingFunctionsEnumerator <TType>(kvp.Value, new DynamicIteratorOfCurrentItemWrapper <TType>(this)); _resultsOfCurrentDocument[kvp.Key] = new TimeCountingEnumerable(multipleIndexingFunctionsEnumerator, mapFuncStats); } } CurrentIndexingScope.Current.SetSourceCollection(collection, mapFuncStats); _filter = filter; }
private bool HandleDocuments(ActionType actionType, DocumentsOperationContext databaseContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, int pageSize, TimeSpan maxTimeForDocumentTransactionToRemainOpen, CancellationToken token) { var moreWorkFound = false; Dictionary <string, long> lastIndexedEtagsByCollection = null; foreach (var collection in _index.Collections) { if (_referencedCollections.TryGetValue(collection, out HashSet <CollectionName> referencedCollections) == false) { continue; } if (lastIndexedEtagsByCollection == null) { lastIndexedEtagsByCollection = new Dictionary <string, long>(StringComparer.OrdinalIgnoreCase); } if (lastIndexedEtagsByCollection.TryGetValue(collection, out long lastIndexedEtag) == false) { lastIndexedEtagsByCollection[collection] = lastIndexedEtag = _indexStorage.ReadLastIndexedEtag(indexContext.Transaction, collection); } if (lastIndexedEtag == 0) // we haven't indexed yet, so we are skipping references for now { continue; } var totalProcessedCount = 0; foreach (var referencedCollection in referencedCollections) { var inMemoryStats = _index.GetReferencesStats(referencedCollection.Name); using (var collectionStats = stats.For("Collection_" + referencedCollection.Name)) { long lastReferenceEtag; switch (actionType) { case ActionType.Document: lastReferenceEtag = IndexStorage.ReadLastProcessedReferenceEtag(indexContext.Transaction.InnerTransaction, collection, referencedCollection); break; case ActionType.Tombstone: lastReferenceEtag = IndexStorage.ReadLastProcessedReferenceTombstoneEtag(indexContext.Transaction.InnerTransaction, collection, referencedCollection); break; default: throw new NotSupportedException(); } var lastEtag = lastReferenceEtag; var resultsCount = 0; var sw = new Stopwatch(); var keepRunning = true; var lastCollectionEtag = -1L; while (keepRunning) { var hasChanges = false; using (databaseContext.OpenReadTransaction()) { sw.Restart(); IEnumerable <Reference> references; switch (actionType) { case ActionType.Document: if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastDocumentEtagInCollection(databaseContext, collection); } references = _documentsStorage .GetDocumentsFrom(databaseContext, referencedCollection.Name, lastEtag + 1, 0, pageSize, DocumentFields.Id | DocumentFields.Etag) .Select(document => { _reference.Key = document.Id; _reference.Etag = document.Etag; return(_reference); }); break; case ActionType.Tombstone: if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastTombstoneEtagInCollection(databaseContext, collection); } references = _documentsStorage .GetTombstonesFrom(databaseContext, referencedCollection.Name, lastEtag + 1, 0, pageSize) .Select(tombstone => { _reference.Key = tombstone.LowerId; _reference.Etag = tombstone.Etag; return(_reference); }); break; default: throw new NotSupportedException(); } var isTombstone = actionType == ActionType.Tombstone; foreach (var referencedDocument in references) { lastEtag = referencedDocument.Etag; hasChanges = true; inMemoryStats.UpdateLastEtag(lastEtag, isTombstone); var documents = GetDocumentFromCollectionThatReference(databaseContext, indexContext, collection, referencedDocument, lastIndexedEtag); using (var docsEnumerator = _index.GetMapEnumerator(documents, collection, indexContext, collectionStats, _index.Type)) { while (docsEnumerator.MoveNext(out IEnumerable mapResults)) { token.ThrowIfCancellationRequested(); totalProcessedCount++; collectionStats.RecordMapReferenceAttempt(); var current = docsEnumerator.Current; stats.RecordDocumentSize(current.Data.Size); try { var numberOfResults = _index.HandleMap(current.LowerId, current.Id, mapResults, writeOperation, indexContext, collectionStats); resultsCount += numberOfResults; collectionStats.RecordMapReferenceSuccess(); _index.MapsPerSec.MarkSingleThreaded(numberOfResults); } catch (Exception e) when(e.IsIndexError()) { docsEnumerator.OnError(); _index.ErrorIndexIfCriticalException(e); collectionStats.RecordMapReferenceError(); if (_logger.IsInfoEnabled) { _logger.Info($"Failed to execute mapping function on '{current.Id}' for '{_index.Name}'.", e); } collectionStats.AddMapReferenceError(current.Id, $"Failed to execute mapping function on {current.Id}. Exception: {e}"); } _index.UpdateThreadAllocations(indexContext, writeOperation, stats, updateReduceStats: false); } } if (CanContinueBatch(databaseContext, indexContext, collectionStats, writeOperation, lastEtag, lastCollectionEtag, totalProcessedCount) == false) { keepRunning = false; break; } if (totalProcessedCount >= pageSize) { keepRunning = false; break; } if (MapDocuments.MaybeRenewTransaction(databaseContext, sw, _configuration, ref maxTimeForDocumentTransactionToRemainOpen)) { break; } } if (hasChanges == false) { break; } } } if (lastReferenceEtag == lastEtag) { // the last referenced etag hasn't changed continue; } moreWorkFound = true; if (_logger.IsInfoEnabled) { _logger.Info($"Executed handle references for '{_index.Name}' index and '{referencedCollection.Name}' collection. " + $"Got {resultsCount:#,#;;0} map results in {collectionStats.Duration.TotalMilliseconds:#,#;;0} ms."); } switch (actionType) { case ActionType.Document: _indexStorage.WriteLastReferenceEtag(indexContext.Transaction, collection, referencedCollection, lastEtag); break; case ActionType.Tombstone: _indexStorage.WriteLastReferenceTombstoneEtag(indexContext.Transaction, collection, referencedCollection, lastEtag); break; default: throw new NotSupportedException(); } } } } return(moreWorkFound); }
public bool Execute(QueryOperationContext queryContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, CancellationToken token) { var maxTimeForDocumentTransactionToRemainOpen = Debugger.IsAttached == false ? _configuration.MaxTimeForDocumentTransactionToRemainOpen.AsTimeSpan : TimeSpan.FromMinutes(15); var moreWorkFound = false; var totalProcessedCount = 0; foreach (var collection in _index.Collections) { using (var collectionStats = stats.For("Collection_" + collection)) { var lastMappedEtag = _indexStorage.ReadLastIndexedEtag(indexContext.Transaction, collection); if (_logger.IsInfoEnabled) { _logger.Info($"Executing map for '{_index.Name}'. Collection: {collection} LastMappedEtag: {lastMappedEtag:#,#;;0}."); } var inMemoryStats = _index.GetStats(collection); var lastEtag = lastMappedEtag; var resultsCount = 0; var pageSize = int.MaxValue; var sw = new Stopwatch(); IndexWriteOperation indexWriter = null; var keepRunning = true; var lastCollectionEtag = -1L; while (keepRunning) { using (queryContext.OpenReadTransaction()) { sw.Restart(); if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastItemEtagInCollection(queryContext, collection); } var items = GetItemsEnumerator(queryContext, collection, lastEtag, pageSize); using (var itemEnumerator = _index.GetMapEnumerator(items, collection, indexContext, collectionStats, _index.Type)) { while (true) { if (itemEnumerator.MoveNext(queryContext.Documents, out IEnumerable mapResults, out var etag) == false) { if (etag > lastEtag) { lastEtag = etag.Value; } collectionStats.RecordMapCompletedReason("No more documents to index"); keepRunning = false; break; } token.ThrowIfCancellationRequested(); if (indexWriter == null) { indexWriter = writeOperation.Value; } var current = itemEnumerator.Current; totalProcessedCount++; collectionStats.RecordMapAttempt(); stats.RecordDocumentSize(current.Size); if (_logger.IsInfoEnabled && totalProcessedCount % 8192 == 0) { _logger.Info($"Executing map for '{_index.Name}'. Processed count: {totalProcessedCount:#,#;;0} etag: {lastEtag:#,#;;0}."); } lastEtag = current.Etag; inMemoryStats.UpdateLastEtag(lastEtag, isTombstone: false); try { var numberOfResults = _index.HandleMap(current, mapResults, indexWriter, indexContext, collectionStats); resultsCount += numberOfResults; collectionStats.RecordMapSuccess(); _index.MapsPerSec?.MarkSingleThreaded(numberOfResults); } catch (Exception e) when(e.IsIndexError()) { itemEnumerator.OnError(); _index.ErrorIndexIfCriticalException(e); collectionStats.RecordMapError(); if (_logger.IsInfoEnabled) { _logger.Info($"Failed to execute mapping function on '{current.Id}' for '{_index.Name}'.", e); } collectionStats.AddMapError(current.Id, $"Failed to execute mapping function on {current.Id}. " + $"Exception: {e}"); } if (CanContinueBatch(queryContext, indexContext, collectionStats, indexWriter, lastEtag, lastCollectionEtag, totalProcessedCount) == false) { keepRunning = false; break; } if (totalProcessedCount >= pageSize) { keepRunning = false; break; } if (MaybeRenewTransaction(queryContext, sw, _configuration, ref maxTimeForDocumentTransactionToRemainOpen)) { break; } } } } } if (lastMappedEtag == lastEtag) { // the last mapped etag hasn't changed continue; } moreWorkFound = true; if (_logger.IsInfoEnabled) { _logger.Info($"Executed map for '{_index.Name}' index and '{collection}' collection. Got {resultsCount:#,#;;0} map results in {collectionStats.Duration.TotalMilliseconds:#,#;;0} ms."); } if (_index.Type.IsMap()) { _index.SaveLastState(); _indexStorage.WriteLastIndexedEtag(indexContext.Transaction, collection, lastEtag); } else { _mapReduceContext.ProcessedDocEtags[collection] = lastEtag; } } } return(moreWorkFound); }
public AutoIndexDocsEnumerator(IEnumerable <Document> documents, IndexingStatsScope stats) { _documentReadStats = stats.For(IndexingOperation.Map.DocumentRead, start: false); _docsEnumerator = documents.GetEnumerator(); }
public virtual bool Execute(DocumentsOperationContext databaseContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, CancellationToken token) { const int pageSize = int.MaxValue; var maxTimeForDocumentTransactionToRemainOpen = Debugger.IsAttached == false ? _configuration.MaxTimeForDocumentTransactionToRemainOpen.AsTimeSpan : TimeSpan.FromMinutes(15); var moreWorkFound = false; foreach (var collection in _index.Collections) { using (var collectionStats = stats.For("Collection_" + collection)) { var lastMappedEtag = _indexStorage.ReadLastIndexedEtag(indexContext.Transaction, collection); var lastTombstoneEtag = _indexStorage.ReadLastProcessedTombstoneEtag(indexContext.Transaction, collection); if (_logger.IsInfoEnabled) { _logger.Info($"Executing cleanup for '{_index} ({_index.Name})'. Collection: {collection}. LastMappedEtag: {lastMappedEtag:#,#;;0}. LastTombstoneEtag: {lastTombstoneEtag:#,#;;0}."); } var inMemoryStats = _index.GetStats(collection); var lastEtag = lastTombstoneEtag; var count = 0; var sw = new Stopwatch(); IndexWriteOperation indexWriter = null; var keepRunning = true; var lastCollectionEtag = -1L; while (keepRunning) { var batchCount = 0; using (databaseContext.OpenReadTransaction()) { sw.Restart(); if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastTombstoneEtagInCollection(databaseContext, collection); } var tombstones = collection == Constants.Documents.Collections.AllDocumentsCollection ? _documentsStorage.GetTombstonesFrom(databaseContext, lastEtag + 1, 0, pageSize) : _documentsStorage.GetTombstonesFrom(databaseContext, collection, lastEtag + 1, 0, pageSize); foreach (var tombstone in tombstones) { token.ThrowIfCancellationRequested(); if (indexWriter == null) { indexWriter = writeOperation.Value; } count++; batchCount++; lastEtag = tombstone.Etag; inMemoryStats.UpdateLastEtag(lastEtag, isTombstone: true); if (_logger.IsInfoEnabled && count % 2048 == 0) { _logger.Info($"Executing cleanup for '{_index.Name}'. Processed count: {count:#,#;;0} etag: {lastEtag}."); } if (tombstone.Type != Tombstone.TombstoneType.Document) { continue; // this can happen when we have '@all_docs' } _index.HandleDelete(tombstone, collection, indexWriter, indexContext, collectionStats); if (CanContinueBatch(databaseContext, indexContext, collectionStats, indexWriter, lastEtag, lastCollectionEtag, batchCount) == false) { keepRunning = false; break; } if (MapDocuments.MaybeRenewTransaction(databaseContext, sw, _configuration, ref maxTimeForDocumentTransactionToRemainOpen)) { break; } } if (batchCount == 0 || batchCount >= pageSize) { break; } } } if (count == 0) { continue; } if (_logger.IsInfoEnabled) { _logger.Info($"Executing cleanup for '{_index} ({_index.Name})'. Processed {count} tombstones in '{collection}' collection in {collectionStats.Duration.TotalMilliseconds:#,#;;0} ms."); } if (_index.Type.IsMap()) { _indexStorage.WriteLastTombstoneEtag(indexContext.Transaction, collection, lastEtag); } else { _mapReduceContext.ProcessedTombstoneEtags[collection] = lastEtag; } moreWorkFound = true; } } return(moreWorkFound); }
public bool Execute(DocumentsOperationContext databaseContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, CancellationToken token) { var maxTimeForDocumentTransactionToRemainOpen = Debugger.IsAttached == false ? _configuration.MaxTimeForDocumentTransactionToRemainOpen.AsTimeSpan : TimeSpan.FromMinutes(15); var moreWorkFound = false; foreach (var collection in _index.Collections) { using (var collectionStats = stats.For("Collection_" + collection)) { if (_logger.IsInfoEnabled) { _logger.Info($"Executing map for '{_index.Name} ({_index.IndexId})'. Collection: {collection}."); } var lastMappedEtag = _indexStorage.ReadLastIndexedEtag(indexContext.Transaction, collection); if (_logger.IsInfoEnabled) { _logger.Info($"Executing map for '{_index.Name} ({_index.IndexId})'. LastMappedEtag: {lastMappedEtag}."); } var lastEtag = lastMappedEtag; var count = 0; var resultsCount = 0; var pageSize = int.MaxValue; var sw = new Stopwatch(); IndexWriteOperation indexWriter = null; var keepRunning = true; var lastCollectionEtag = -1L; while (keepRunning) { using (databaseContext.OpenReadTransaction()) { sw.Restart(); if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastDocumentEtagInCollection(databaseContext, collection); } var documents = GetDocumentsEnumerator(databaseContext, collection, lastEtag, pageSize); using (var docsEnumerator = _index.GetMapEnumerator(documents, collection, indexContext, collectionStats)) { while (true) { IEnumerable mapResults; if (docsEnumerator.MoveNext(out mapResults) == false) { collectionStats.RecordMapCompletedReason("No more documents to index"); keepRunning = false; break; } token.ThrowIfCancellationRequested(); if (indexWriter == null) { indexWriter = writeOperation.Value; } var current = docsEnumerator.Current; if (_logger.IsInfoEnabled) { _logger.Info( $"Executing map for '{_index.Name} ({_index.IndexId})'. Processing document: {current.Key}."); } collectionStats.RecordMapAttempt(); count++; lastEtag = current.Etag; try { var numberOfResults = _index.HandleMap(current.LoweredKey, mapResults, indexWriter, indexContext, collectionStats); _index.MapsPerSec.Mark(numberOfResults); resultsCount += numberOfResults; collectionStats.RecordMapSuccess(); } catch (Exception e) { _index.HandleError(e); collectionStats.RecordMapError(); if (_logger.IsInfoEnabled) { _logger.Info( $"Failed to execute mapping function on '{current.Key}' for '{_index.Name} ({_index.IndexId})'.", e); } collectionStats.AddMapError(current.Key, $"Failed to execute mapping function on {current.Key}. Exception: {e}"); } if (CanContinueBatch(collectionStats, lastEtag, lastCollectionEtag) == false) { keepRunning = false; break; } if (count >= pageSize) { keepRunning = false; break; } if (MaybeRenewTransaction(databaseContext, sw, _configuration, ref maxTimeForDocumentTransactionToRemainOpen)) { break; } } } } } if (count == 0) { continue; } if (_logger.IsInfoEnabled) { _logger.Info($"Executing map for '{_index.Name} ({_index.IndexId})'. Processed {count:#,#;;0} documents and {resultsCount:#,#;;0} map results in '{collection}' collection in {collectionStats.Duration.TotalMilliseconds:#,#;;0} ms."); } if (_index.Type.IsMap()) { _indexStorage.WriteLastIndexedEtag(indexContext.Transaction, collection, lastEtag); } else { _mapReduceContext.ProcessedDocEtags[collection] = lastEtag; } moreWorkFound = true; } } return(moreWorkFound); }
public unsafe dynamic LoadDocument(LazyStringValue keyLazy, string keyString, string collectionName) { using (_loadDocumentStats?.Start() ?? (_loadDocumentStats = _stats?.For(IndexingOperation.LoadDocument))) { if (keyLazy == null && keyString == null) { return(DynamicNullObject.Null); } var source = Source; if (source == null) { throw new ArgumentException("Cannot execute LoadDocument. Source is not set."); } var id = source.GetId() as LazyStringValue; if (id == null) { throw new ArgumentException("Cannot execute LoadDocument. Source does not have a key."); } if (keyLazy != null && id.Equals(keyLazy)) { return(source); } if (keyString != null && id.Equals(keyString)) { return(source); } Slice keySlice; if (keyLazy != null) { if (keyLazy.Length == 0) { return(DynamicNullObject.Null); } // we intentionally don't dispose of the scope here, this is being tracked by the references // and will be disposed there. Slice.External(_documentsContext.Allocator, keyLazy.Buffer, keyLazy.Size, out keySlice); } else { if (keyString.Length == 0) { return(DynamicNullObject.Null); } // we intentionally don't dispose of the scope here, this is being tracked by the references // and will be disposed there. Slice.From(_documentsContext.Allocator, keyString, out keySlice); } // making sure that we normalize the case of the key so we'll be able to find // it in case insensitive manner _documentsContext.Allocator.ToLowerCase(ref keySlice.Content); var references = GetReferencesForDocument(id); var referenceEtags = GetReferenceEtags(); references.Add(keySlice); // when there is conflict, we need to apply same behavior as if the document would not exist var document = _documentsStorage.Get(_documentsContext, keySlice, throwOnConflict: false); if (document == null) { MaybeUpdateReferenceEtags(referenceEtags, collectionName, 0); return(DynamicNullObject.Null); } MaybeUpdateReferenceEtags(referenceEtags, collectionName, document.Etag); // we can't share one DynamicBlittableJson instance among all documents because we can have multiple LoadDocuments in a single scope return(new DynamicBlittableJson(document)); } }
private unsafe bool HandleDocuments(ActionType actionType, DocumentsOperationContext databaseContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, int pageSize, TimeSpan maxTimeForDocumentTransactionToRemainOpen, CancellationToken token) { var moreWorkFound = false; Dictionary <string, long> lastIndexedEtagsByCollection = null; foreach (var collection in _index.Collections) { if (_referencedCollections.TryGetValue(collection, out HashSet <CollectionName> referencedCollections) == false) { continue; } if (lastIndexedEtagsByCollection == null) { lastIndexedEtagsByCollection = new Dictionary <string, long>(StringComparer.OrdinalIgnoreCase); } if (lastIndexedEtagsByCollection.TryGetValue(collection, out long lastIndexedEtag) == false) { lastIndexedEtagsByCollection[collection] = lastIndexedEtag = _indexStorage.ReadLastIndexedEtag(indexContext.Transaction, collection); } if (lastIndexedEtag == 0) // we haven't indexed yet, so we are skipping references for now { continue; } foreach (var referencedCollection in referencedCollections) { using (var collectionStats = stats.For("Collection_" + referencedCollection.Name)) { if (_logger.IsInfoEnabled) { _logger.Info($"Executing handle references for '{_index.Name}'. Collection: {referencedCollection.Name}. Type: {actionType}."); } long lastReferenceEtag; switch (actionType) { case ActionType.Document: lastReferenceEtag = _indexStorage.ReadLastProcessedReferenceEtag(indexContext.Transaction, collection, referencedCollection); break; case ActionType.Tombstone: lastReferenceEtag = _indexStorage.ReadLastProcessedReferenceTombstoneEtag(indexContext.Transaction, collection, referencedCollection); break; default: throw new NotSupportedException(); } if (_logger.IsInfoEnabled) { _logger.Info($"Executing handle references for '{_index.Name}'. LastReferenceEtag: {lastReferenceEtag}."); } var lastEtag = lastReferenceEtag; var count = 0; var sw = new Stopwatch(); IndexWriteOperation indexWriter = null; var keepRunning = true; var lastCollectionEtag = -1L; while (keepRunning) { var batchCount = 0; using (databaseContext.OpenReadTransaction()) { sw.Restart(); IEnumerable <Reference> references; switch (actionType) { case ActionType.Document: if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastDocumentEtagInCollection(databaseContext, collection); } references = _documentsStorage .GetDocumentsFrom(databaseContext, referencedCollection.Name, lastEtag + 1, 0, pageSize) .Select(document => { _reference.Key = document.Id; _reference.Etag = document.Etag; return(_reference); }); break; case ActionType.Tombstone: if (lastCollectionEtag == -1) { lastCollectionEtag = _index.GetLastTombstoneEtagInCollection(databaseContext, collection); } references = _documentsStorage .GetTombstonesFrom(databaseContext, referencedCollection.Name, lastEtag + 1, 0, pageSize) .Select(tombstone => { _reference.Key = tombstone.LowerId; _reference.Etag = tombstone.Etag; return(_reference); }); break; default: throw new NotSupportedException(); } foreach (var referencedDocument in references) { if (_logger.IsInfoEnabled) { _logger.Info($"Executing handle references for '{_index.Name}'. Processing reference: {referencedDocument.Key}."); } lastEtag = referencedDocument.Etag; count++; batchCount++; var documents = new List <Document>(); foreach (var key in _indexStorage .GetDocumentKeysFromCollectionThatReference(collection, referencedDocument.Key, indexContext.Transaction)) { using (DocumentIdWorker.GetLower(databaseContext.Allocator, key.Content.Ptr, key.Size, out var loweredKey)) { // when there is conflict, we need to apply same behavior as if the document would not exist var doc = _documentsStorage.Get(databaseContext, loweredKey, throwOnConflict: false); if (doc != null && doc.Etag <= lastIndexedEtag) { documents.Add(doc); } } } using (var docsEnumerator = _index.GetMapEnumerator(documents, collection, indexContext, collectionStats)) { while (docsEnumerator.MoveNext(out IEnumerable mapResults)) { token.ThrowIfCancellationRequested(); var current = docsEnumerator.Current; if (indexWriter == null) { indexWriter = writeOperation.Value; } if (_logger.IsInfoEnabled) { _logger.Info($"Executing handle references for '{_index.Name}'. Processing document: {current.Id}."); } try { _index.HandleMap(current.LowerId, mapResults, indexWriter, indexContext, collectionStats); } catch (Exception e) { if (_logger.IsInfoEnabled) { _logger.Info($"Failed to execute mapping function on '{current.Id}' for '{_index.Name}'.", e); } } } } if (CanContinueBatch(databaseContext, indexContext, collectionStats, indexWriter, lastEtag, lastCollectionEtag, batchCount) == false) { keepRunning = false; break; } if (MapDocuments.MaybeRenewTransaction(databaseContext, sw, _configuration, ref maxTimeForDocumentTransactionToRemainOpen)) { break; } } if (batchCount == 0 || batchCount >= pageSize) { break; } } } if (count == 0) { continue; } if (_logger.IsInfoEnabled) { _logger.Info($"Executing handle references for '{_index} ({_index.Name})'. Processed {count} references in '{referencedCollection.Name}' collection in {collectionStats.Duration.TotalMilliseconds:#,#;;0} ms."); } switch (actionType) { case ActionType.Document: _indexStorage.WriteLastReferenceEtag(indexContext.Transaction, collection, referencedCollection, lastEtag); break; case ActionType.Tombstone: _indexStorage.WriteLastReferenceTombstoneEtag(indexContext.Transaction, collection, referencedCollection, lastEtag); break; default: throw new NotSupportedException(); } moreWorkFound = true; } } } return(moreWorkFound); }
public bool Execute(DocumentsOperationContext databaseContext, TransactionOperationContext indexContext, Lazy <IndexWriteOperation> writeOperation, IndexingStatsScope stats, CancellationToken token) { if (_mapReduceContext.StoreByReduceKeyHash.Count == 0) { WriteLastEtags(indexContext); // we need to write etags here, because if we filtered everything during map then we will loose last indexed etag information and this will cause an endless indexing loop return(false); } ReduceResultsSchema.Create(indexContext.Transaction.InnerTransaction, PageNumberToReduceResultTableName, 32); var table = indexContext.Transaction.InnerTransaction.OpenTable(ReduceResultsSchema, PageNumberToReduceResultTableName); var lowLevelTransaction = indexContext.Transaction.InnerTransaction.LowLevelTransaction; var writer = writeOperation.Value; var treeScopeStats = stats.For(IndexingOperation.Reduce.TreeScope, start: false); var nestedValuesScopeStats = stats.For(IndexingOperation.Reduce.NestedValuesScope, start: false); foreach (var store in _mapReduceContext.StoreByReduceKeyHash) { token.ThrowIfCancellationRequested(); using (var reduceKeyHash = indexContext.GetLazyString(store.Key.ToString(CultureInfo.InvariantCulture))) using (store.Value) using (_aggregationBatch) { var modifiedStore = store.Value; switch (modifiedStore.Type) { case MapResultsStorageType.Tree: using (treeScopeStats.Start()) { HandleTreeReduction(indexContext, treeScopeStats, modifiedStore, lowLevelTransaction, writer, reduceKeyHash, table, token); } break; case MapResultsStorageType.Nested: using (nestedValuesScopeStats.Start()) { HandleNestedValuesReduction(indexContext, nestedValuesScopeStats, modifiedStore, writer, reduceKeyHash, token); } break; default: throw new ArgumentOutOfRangeException(modifiedStore.Type.ToString()); } } if (_mapReduceContext.FreedPages.Count > 0) { long tmp = 0; using (treeScopeStats.Start()) using (Slice.External(indexContext.Allocator, (byte *)&tmp, sizeof(long), out Slice pageNumberSlice)) { foreach (var freedPage in _mapReduceContext.FreedPages) { tmp = Bits.SwapBytes(freedPage); table.DeleteByKey(pageNumberSlice); } } } } if (stats.Duration >= MinReduceDurationToCalculateProcessMemoryUsage) { var workingSet = MemoryInformation.GetWorkingSetInBytes(); var privateMemory = MemoryInformation.GetManagedMemoryInBytes() + MemoryInformation.GetUnManagedAllocationsInBytes(); stats.RecordReduceMemoryStats(workingSet, privateMemory); } WriteLastEtags(indexContext); _mapReduceContext.StoreNextMapResultId(); return(false); }