public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token) { token.ThrowIfCancellationRequested(); context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (LogContext.WithDatabase(context.DatabaseName)) using (MapIndexingInProgress(new List <IndexToWorkOn> { indexToWorkOn })) { var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn> { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } IndexingBatchInfo batchInfo = null; IndexingPerformanceStats performance = null; try { batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List <string> { indexToWorkOn.Index.PublicName }); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token); } finally { if (batchInfo != null) { if (performance != null) { batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); } context.ReportIndexingBatchCompleted(batchInfo); } } } indexReplacer.ReplaceIndexes(new [] { indexToWorkOn.IndexId }); }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator) { const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName"; PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList(); var query = string.Join(" OR ", tags); JsonDocument highestByEtag = null; var cts = new CancellationTokenSource(); using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); if (op.Header.TotalResults == 0 || (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch)) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0) highestByEtag = doc; }); } result = new PrecomputedIndexingBatch { LastIndexed = highestByEtag.Etag, LastModified = highestByEtag.LastModified.Value, Documents = docsToIndex, Index = index }; }); if (result != null && result.Documents != null && result.Documents.Count > 0) Database.IndexingExecuter.IndexPrecomputedBatch(result); }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (MapIndexingInProgress(new List <IndexToWorkOn> { indexToWorkOn })) { var indexingBatchForIndex = FilterIndexes(new List <IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) { return; } IndexingBatchInfo batchInfo = null; try { context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, precomputedBatch.Documents.Sum(x => x.SerializedSizeOnDisk), new List <string>() { indexToWorkOn.Index.PublicName }, out batchInfo); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); } finally { var performance = indexingBatchForIndex.Batch.GetIndexingPerformance(); if (batchInfo != null) { if (performance != null) { batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); } batchInfo.BatchCompleted(); } } } }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch, CancellationToken token) { token.ThrowIfCancellationRequested(); context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (LogContext.WithDatabase(context.DatabaseName)) using (MapIndexingInProgress(new List<IndexToWorkOn> { indexToWorkOn })) { IndexingBatchForIndex indexingBatchForIndex; if (precomputedBatch.Documents.Count > 0) { indexingBatchForIndex = FilterIndexes( new List<IndexToWorkOn> {indexToWorkOn}, precomputedBatch.Documents, precomputedBatch.LastIndexed) .FirstOrDefault(); } else { indexingBatchForIndex = new IndexingBatchForIndex { Batch = new IndexingBatch(precomputedBatch.LastIndexed), Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = precomputedBatch.LastIndexed }; } if (indexingBatchForIndex == null) return; IndexingBatchInfo batchInfo = null; IndexingPerformanceStats performance = null; try { batchInfo = context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, -1, new List<string> { indexToWorkOn.Index.PublicName }); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } performance = HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified, token); } finally { if (batchInfo != null) { if (performance != null) batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); context.ReportIndexingBatchCompleted(batchInfo); } } } indexReplacer.ReplaceIndexes(new []{ indexToWorkOn.IndexId }); }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; var indexingBatchForIndex = FilterIndexes(new List<IndexToWorkOn>() {indexToWorkOn}, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) return; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts) { PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var query = GetQueryForAllMatchingDocumentsForIndex(generator); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = pageSize }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results if (index.IsTestIndex == false && op.Header.TotalResults > pageSize) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } if (Log.IsDebugEnabled) { Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); } var totalLoadedDocumentSize = 0; const int totalSizeToCheck = 16 * 1024 * 1024; //16MB var localLoadedDocumentSize = 0; op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var serializedSizeOnDisk = metadata.Value<int>(Constants.SerializedSizeOnDisk); metadata.Remove(Constants.SerializedSizeOnDisk); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, SerializedSizeOnDisk = serializedSizeOnDisk, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); totalLoadedDocumentSize += serializedSizeOnDisk; localLoadedDocumentSize += serializedSizeOnDisk; if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) { var error = string.Format( @"Aborting applying precomputed batch for index id: {0}, name: {1} because we have {2}mb of documents that were fetched and the configured max data to fetch is {3}mb", index.indexId, index.PublicName, totalLoadedDocumentSize, Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024); //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } if (localLoadedDocumentSize <= totalSizeToCheck) return; localLoadedDocumentSize = 0; if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb) { var error = string.Format( @"Aborting applying precomputed batch for index id: {0}, name: {1} because we have {2}mb of available memory and the available memory for processing is: {3}mb", index.indexId, index.PublicName, MemoryStatistics.AvailableMemoryInMb, Database.Configuration.MemoryLimitForProcessingInMb); //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } }); result = new PrecomputedIndexingBatch { LastIndexed = op.Header.IndexEtag, LastModified = op.Header.IndexTimestamp, Documents = docsToIndex, Index = index }; } }); if (result != null && result.Documents != null && result.Documents.Count >= 0) { using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) { Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token); if (index.IsTestIndex) TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId)); } } }
public void IndexPrecomputedBatch(PrecomputedIndexingBatch precomputedBatch) { context.MetricsCounters.IndexedPerSecond.Mark(precomputedBatch.Documents.Count); var indexToWorkOn = new IndexToWorkOn() { Index = precomputedBatch.Index, IndexId = precomputedBatch.Index.indexId, LastIndexedEtag = Etag.Empty }; using (MapIndexingInProgress(new List<IndexToWorkOn> {indexToWorkOn})) { var indexingBatchForIndex = FilterIndexes(new List<IndexToWorkOn>() { indexToWorkOn }, precomputedBatch.Documents, precomputedBatch.LastIndexed).FirstOrDefault(); if (indexingBatchForIndex == null) return; IndexingBatchInfo batchInfo = null; try { context.ReportIndexingBatchStarted(precomputedBatch.Documents.Count, precomputedBatch.Documents.Sum(x => x.SerializedSizeOnDisk), new List<string>() { indexToWorkOn.Index.PublicName }, out batchInfo); batchInfo.BatchType = BatchType.Precomputed; if (Log.IsDebugEnabled) { Log.Debug("Going to index precomputed documents for a new index {0}. Count of precomputed docs {1}", precomputedBatch.Index.PublicName, precomputedBatch.Documents.Count); } HandleIndexingFor(indexingBatchForIndex, precomputedBatch.LastIndexed, precomputedBatch.LastModified); } finally { var performance = indexingBatchForIndex.Batch.GetIndexingPerformance(); if (batchInfo != null) { if (performance != null) batchInfo.PerformanceStats.TryAdd(indexingBatchForIndex.Index.PublicName, performance); batchInfo.BatchCompleted(); } } } }