private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator) { const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName"; PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList(); var query = string.Join(" OR ", tags); JsonDocument highestByEtag = null; var cts = new CancellationTokenSource(); using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); if (op.Header.TotalResults == 0 || (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch)) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0) highestByEtag = doc; }); } result = new PrecomputedIndexingBatch { LastIndexed = highestByEtag.Etag, LastModified = highestByEtag.LastModified.Value, Documents = docsToIndex, Index = index }; }); if (result != null && result.Documents != null && result.Documents.Count > 0) Database.IndexingExecuter.IndexPrecomputedBatch(result); }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts) { PrecomputedIndexingBatch result = null; var docsToIndex = new List <JsonDocument>(); TransactionalStorage.Batch(actions => { var query = GetQueryForAllMatchingDocumentsForIndex(generator); JsonDocument highestByEtag = null; using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = pageSize }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); if (op.Header.TotalResults == 0 || (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch)) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); op.Execute(document => { var metadata = document.Value <RavenJObject>(Constants.Metadata); var key = metadata.Value <string>("@id"); var etag = Etag.Parse(metadata.Value <string>("@etag")); var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified)); document.Remove(Constants.Metadata); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0) { highestByEtag = doc; } }); } result = new PrecomputedIndexingBatch { LastIndexed = highestByEtag.Etag, LastModified = highestByEtag.LastModified.Value, Documents = docsToIndex, Index = index }; }); if (result != null && result.Documents != null && result.Documents.Count > 0) { using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) { Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token); if (index.IsTestIndex) { TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId)); } } } }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts) { PrecomputedIndexingBatch result = null; var docsToIndex = new List <JsonDocument>(); TransactionalStorage.Batch(actions => { var query = QueryBuilder.GetQueryForAllMatchingDocumentsForIndex(Database, generator.ForEntityNames); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = pageSize }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results if (op.Header.TotalResults > pageSize && index.IsTestIndex == false) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } if (Log.IsDebugEnabled) { Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); } var totalLoadedDocumentSize = 0; const int totalSizeToCheck = 16 * 1024 * 1024; //16MB var localLoadedDocumentSize = 0; op.Execute(document => { var metadata = document.Value <RavenJObject>(Constants.Metadata); var key = metadata.Value <string>("@id"); var etag = Etag.Parse(metadata.Value <string>("@etag")); var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified)); document.Remove(Constants.Metadata); var serializedSizeOnDisk = metadata.Value <int>(Constants.SerializedSizeOnDisk); metadata.Remove(Constants.SerializedSizeOnDisk); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, SerializedSizeOnDisk = serializedSizeOnDisk, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); totalLoadedDocumentSize += serializedSizeOnDisk; localLoadedDocumentSize += serializedSizeOnDisk; if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) { var error = $"Aborting applying precomputed batch for index id: {index.indexId}, " + $"name: {index.PublicName} because we have {totalLoadedDocumentSize}MB of documents that were fetched" + $"and the configured max data to fetch is " + $"{Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024}MB"; //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } if (localLoadedDocumentSize <= totalSizeToCheck) { return; } localLoadedDocumentSize = 0; if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb) { var error = $"Aborting applying precomputed batch for index id: {index.indexId}, " + $"name: {index.PublicName} because we have {MemoryStatistics.AvailableMemoryInMb}MB " + $"of available memory and the available memory for processing is: " + $"{Database.Configuration.MemoryLimitForProcessingInMb}MB"; //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } }); result = new PrecomputedIndexingBatch { LastIndexed = op.Header.IndexEtag, LastModified = op.Header.IndexTimestamp, Documents = docsToIndex, Index = index }; } }); if (result != null && result.Documents != null && result.Documents.Count >= 0) { using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) { Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token); if (index.IsTestIndex) { TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId)); } } } }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator) { const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName"; PrecomputedIndexingBatch result = null; var docsToIndex = new List <JsonDocument>(); TransactionalStorage.Batch(actions => { var countOfDocuments = actions.Documents.GetDocumentsCount(); var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList(); var query = string.Join(" OR ", tags); var stats = actions.Indexing.GetIndexStats( IndexDefinitionStorage.GetIndexDefinition(DocumentsByEntityNameIndex).IndexId); var lastIndexedEtagByRavenDocumentsByEntityName = stats.LastIndexedEtag; var lastModifiedByRavenDocumentsByEntityName = stats.LastIndexedTimestamp; var cts = new CancellationTokenSource(); using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery { Query = query }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); if (op.Header.TotalResults == 0 || op.Header.TotalResults > (countOfDocuments * 0.25) || (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToIndexInSingleBatch * 4)) { // we don't apply this optimization if the total number of results is more than // 25% of the count of documents (would be easier to just run it regardless). // or if the number of docs to index is significantly more than the max numbers // to index in a single batch. The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); op.Execute(document => { var metadata = document.Value <RavenJObject>(Constants.Metadata); var key = metadata.Value <string>("@id"); var etag = Etag.Parse(metadata.Value <string>("@etag")); var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified)); document.Remove(Constants.Metadata); docsToIndex.Add(new JsonDocument { DataAsJson = document, Etag = etag, Key = key, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }); }); } result = new PrecomputedIndexingBatch { LastIndexed = lastIndexedEtagByRavenDocumentsByEntityName, LastModified = lastModifiedByRavenDocumentsByEntityName, Documents = docsToIndex, Index = index }; }); if (result != null && result.Documents != null && result.Documents.Count > 0) { Database.IndexingExecuter.IndexPrecomputedBatch(result); } }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts) { PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var query = GetQueryForAllMatchingDocumentsForIndex(generator); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = pageSize }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results if (index.IsTestIndex == false && op.Header.TotalResults > pageSize) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } if (Log.IsDebugEnabled) { Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); } var totalLoadedDocumentSize = 0; const int totalSizeToCheck = 16 * 1024 * 1024; //16MB var localLoadedDocumentSize = 0; op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var serializedSizeOnDisk = metadata.Value<int>(Constants.SerializedSizeOnDisk); metadata.Remove(Constants.SerializedSizeOnDisk); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, SerializedSizeOnDisk = serializedSizeOnDisk, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); totalLoadedDocumentSize += serializedSizeOnDisk; localLoadedDocumentSize += serializedSizeOnDisk; if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) { var error = string.Format( @"Aborting applying precomputed batch for index id: {0}, name: {1} because we have {2}mb of documents that were fetched and the configured max data to fetch is {3}mb", index.indexId, index.PublicName, totalLoadedDocumentSize, Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024); //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } if (localLoadedDocumentSize <= totalSizeToCheck) return; localLoadedDocumentSize = 0; if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb) { var error = string.Format( @"Aborting applying precomputed batch for index id: {0}, name: {1} because we have {2}mb of available memory and the available memory for processing is: {3}mb", index.indexId, index.PublicName, MemoryStatistics.AvailableMemoryInMb, Database.Configuration.MemoryLimitForProcessingInMb); //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } }); result = new PrecomputedIndexingBatch { LastIndexed = op.Header.IndexEtag, LastModified = op.Header.IndexTimestamp, Documents = docsToIndex, Index = index }; } }); if (result != null && result.Documents != null && result.Documents.Count >= 0) { using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) { Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token); if (index.IsTestIndex) TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId)); } } }