Example #1
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator)
        {
            const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName";

            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List<JsonDocument>();
            TransactionalStorage.Batch(actions =>
            {
                var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList();

                var query = string.Join(" OR ", tags);

	            JsonDocument highestByEtag = null;

                var cts = new CancellationTokenSource();
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery
                {
                    Query = query,
					PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch
                }, actions, linked)
                {
                    ShouldSkipDuplicateChecking = true
                })
                {
                    op.Init();
                    if (op.Header.TotalResults == 0 ||
                        (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch))
                    {
                        // we don't apply this optimization if the total number of results 
						// to index is more than the max numbers to index in a single batch. 
						// The idea here is that we need to keep the amount
                        // of memory we use to a manageable level even when introducing a new index to a BIG 
                        // database
                        try
                        {
                            cts.Cancel();
                            // we have to run just a little bit of the query to properly setup the disposal
                            op.Execute(o => { });
                        }
                        catch (OperationCanceledException)
                        {
                        }
                        return;
                    }

                    Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index,
                              op.Header.TotalResults);
                    op.Execute(document =>
                    {
                        var metadata = document.Value<RavenJObject>(Constants.Metadata);
                        var key = metadata.Value<string>("@id");
                        var etag = Etag.Parse(metadata.Value<string>("@etag"));
                        var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified));
                        document.Remove(Constants.Metadata);

	                    var doc = new JsonDocument
	                    {
		                    DataAsJson = document,
		                    Etag = etag,
		                    Key = key,
		                    LastModified = lastModified,
		                    SkipDeleteFromIndex = true,
		                    Metadata = metadata
	                    };

	                    docsToIndex.Add(doc);

	                    if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0)
		                    highestByEtag = doc;
                    });
                }

	            result = new PrecomputedIndexingBatch
                {
                    LastIndexed = highestByEtag.Etag,
                    LastModified = highestByEtag.LastModified.Value,
                    Documents = docsToIndex,
                    Index = index
                };
            });

            if (result != null && result.Documents != null && result.Documents.Count > 0)
                Database.IndexingExecuter.IndexPrecomputedBatch(result);

        }
Example #2
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts)
        {
            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List <JsonDocument>();

            TransactionalStorage.Batch(actions =>
            {
                var query = GetQueryForAllMatchingDocumentsForIndex(generator);

                JsonDocument highestByEtag = null;

                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                    using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery
                    {
                        Query = query,
                        PageSize = pageSize
                    }, actions, linked)
                    {
                        ShouldSkipDuplicateChecking = true
                    })
                    {
                        op.Init();
                        if (op.Header.TotalResults == 0 ||
                            (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch))
                        {
                            // we don't apply this optimization if the total number of results
                            // to index is more than the max numbers to index in a single batch.
                            // The idea here is that we need to keep the amount
                            // of memory we use to a manageable level even when introducing a new index to a BIG
                            // database
                            try
                            {
                                cts.Cancel();
                                // we have to run just a little bit of the query to properly setup the disposal
                                op.Execute(o => { });
                            }
                            catch (OperationCanceledException)
                            {
                            }
                            return;
                        }

                        Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index,
                                  op.Header.TotalResults);
                        op.Execute(document =>
                        {
                            var metadata     = document.Value <RavenJObject>(Constants.Metadata);
                            var key          = metadata.Value <string>("@id");
                            var etag         = Etag.Parse(metadata.Value <string>("@etag"));
                            var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified));
                            document.Remove(Constants.Metadata);

                            var doc = new JsonDocument
                            {
                                DataAsJson          = document,
                                Etag                = etag,
                                Key                 = key,
                                LastModified        = lastModified,
                                SkipDeleteFromIndex = true,
                                Metadata            = metadata
                            };

                            docsToIndex.Add(doc);

                            if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0)
                            {
                                highestByEtag = doc;
                            }
                        });
                    }

                result = new PrecomputedIndexingBatch
                {
                    LastIndexed  = highestByEtag.Etag,
                    LastModified = highestByEtag.LastModified.Value,
                    Documents    = docsToIndex,
                    Index        = index
                };
            });

            if (result != null && result.Documents != null && result.Documents.Count > 0)
            {
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                {
                    Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token);

                    if (index.IsTestIndex)
                    {
                        TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId));
                    }
                }
            }
        }
Example #3
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts)
        {
            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List <JsonDocument>();

            TransactionalStorage.Batch(actions =>
            {
                var query = QueryBuilder.GetQueryForAllMatchingDocumentsForIndex(Database, generator.ForEntityNames);

                using (DocumentCacher.SkipSetDocumentsInDocumentCache())
                    using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                        using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery
                        {
                            Query = query,
                            PageSize = pageSize
                        }, actions, linked)
                        {
                            ShouldSkipDuplicateChecking = true
                        })
                        {
                            op.Init();

                            //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results
                            if (op.Header.TotalResults > pageSize && index.IsTestIndex == false)
                            {
                                // we don't apply this optimization if the total number of results
                                // to index is more than the max numbers to index in a single batch.
                                // The idea here is that we need to keep the amount
                                // of memory we use to a manageable level even when introducing a new index to a BIG
                                // database
                                try
                                {
                                    cts.Cancel();
                                    // we have to run just a little bit of the query to properly setup the disposal
                                    op.Execute(o => { });
                                }
                                catch (OperationCanceledException)
                                {
                                }
                                return;
                            }

                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs",
                                          index, op.Header.TotalResults);
                            }

                            var totalLoadedDocumentSize = 0;
                            const int totalSizeToCheck  = 16 * 1024 * 1024; //16MB
                            var localLoadedDocumentSize = 0;
                            op.Execute(document =>
                            {
                                var metadata     = document.Value <RavenJObject>(Constants.Metadata);
                                var key          = metadata.Value <string>("@id");
                                var etag         = Etag.Parse(metadata.Value <string>("@etag"));
                                var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified));
                                document.Remove(Constants.Metadata);
                                var serializedSizeOnDisk = metadata.Value <int>(Constants.SerializedSizeOnDisk);
                                metadata.Remove(Constants.SerializedSizeOnDisk);

                                var doc = new JsonDocument
                                {
                                    DataAsJson           = document,
                                    Etag                 = etag,
                                    Key                  = key,
                                    SerializedSizeOnDisk = serializedSizeOnDisk,
                                    LastModified         = lastModified,
                                    SkipDeleteFromIndex  = true,
                                    Metadata             = metadata
                                };

                                docsToIndex.Add(doc);
                                totalLoadedDocumentSize += serializedSizeOnDisk;
                                localLoadedDocumentSize += serializedSizeOnDisk;

                                if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes)
                                {
                                    var error = $"Aborting applying precomputed batch for index id: {index.indexId}, " +
                                                $"name: {index.PublicName} because we have {totalLoadedDocumentSize}MB of documents that were fetched" +
                                                $"and the configured max data to fetch is " +
                                                $"{Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024}MB";

                                    //we are aborting operation, so don't keep the references
                                    docsToIndex.Clear();
                                    throw new TotalDataSizeExceededException(error);
                                }


                                if (localLoadedDocumentSize <= totalSizeToCheck)
                                {
                                    return;
                                }

                                localLoadedDocumentSize = 0;

                                if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb)
                                {
                                    var error = $"Aborting applying precomputed batch for index id: {index.indexId}, " +
                                                $"name: {index.PublicName} because we have {MemoryStatistics.AvailableMemoryInMb}MB " +
                                                $"of available memory and the available memory for processing is: " +
                                                $"{Database.Configuration.MemoryLimitForProcessingInMb}MB";

                                    //we are aborting operation, so don't keep the references
                                    docsToIndex.Clear();
                                    throw new TotalDataSizeExceededException(error);
                                }
                            });

                            result = new PrecomputedIndexingBatch
                            {
                                LastIndexed  = op.Header.IndexEtag,
                                LastModified = op.Header.IndexTimestamp,
                                Documents    = docsToIndex,
                                Index        = index
                            };
                        }
            });

            if (result != null && result.Documents != null && result.Documents.Count >= 0)
            {
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                {
                    Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token);

                    if (index.IsTestIndex)
                    {
                        TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId));
                    }
                }
            }
        }
Example #4
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator)
        {
            const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName";

            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List <JsonDocument>();

            TransactionalStorage.Batch(actions =>
            {
                var countOfDocuments = actions.Documents.GetDocumentsCount();

                var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList();

                var query = string.Join(" OR ", tags);
                var stats =
                    actions.Indexing.GetIndexStats(
                        IndexDefinitionStorage.GetIndexDefinition(DocumentsByEntityNameIndex).IndexId);

                var lastIndexedEtagByRavenDocumentsByEntityName = stats.LastIndexedEtag;
                var lastModifiedByRavenDocumentsByEntityName    = stats.LastIndexedTimestamp;

                var cts = new CancellationTokenSource();
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                    using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery
                    {
                        Query = query
                    }, actions, linked)
                    {
                        ShouldSkipDuplicateChecking = true
                    })
                    {
                        op.Init();
                        if (op.Header.TotalResults == 0 ||
                            op.Header.TotalResults > (countOfDocuments * 0.25) ||
                            (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToIndexInSingleBatch * 4))
                        {
                            // we don't apply this optimization if the total number of results is more than
                            // 25% of the count of documents (would be easier to just run it regardless).
                            // or if the number of docs to index is significantly more than the max numbers
                            // to index in a single batch. The idea here is that we need to keep the amount
                            // of memory we use to a manageable level even when introducing a new index to a BIG
                            // database
                            try
                            {
                                cts.Cancel();
                                // we have to run just a little bit of the query to properly setup the disposal
                                op.Execute(o => { });
                            }
                            catch (OperationCanceledException)
                            {
                            }
                            return;
                        }

                        Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index,
                                  op.Header.TotalResults);
                        op.Execute(document =>
                        {
                            var metadata     = document.Value <RavenJObject>(Constants.Metadata);
                            var key          = metadata.Value <string>("@id");
                            var etag         = Etag.Parse(metadata.Value <string>("@etag"));
                            var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified));
                            document.Remove(Constants.Metadata);

                            docsToIndex.Add(new JsonDocument
                            {
                                DataAsJson          = document,
                                Etag                = etag,
                                Key                 = key,
                                LastModified        = lastModified,
                                SkipDeleteFromIndex = true,
                                Metadata            = metadata
                            });
                        });
                    }

                result = new PrecomputedIndexingBatch
                {
                    LastIndexed  = lastIndexedEtagByRavenDocumentsByEntityName,
                    LastModified = lastModifiedByRavenDocumentsByEntityName,
                    Documents    = docsToIndex,
                    Index        = index
                };
            });

            if (result != null && result.Documents != null && result.Documents.Count > 0)
            {
                Database.IndexingExecuter.IndexPrecomputedBatch(result);
            }
        }
Example #5
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts)
        {
            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List<JsonDocument>();
            TransactionalStorage.Batch(actions =>
            {
                var query = GetQueryForAllMatchingDocumentsForIndex(generator);

                using (DocumentCacher.SkipSetDocumentsInDocumentCache())
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery
                {
                    Query = query,
                    PageSize = pageSize
                }, actions, linked)
                {
                    ShouldSkipDuplicateChecking = true
                })
                {
                    op.Init();

                    //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results
                    if (index.IsTestIndex == false && op.Header.TotalResults > pageSize)
                    {
                        // we don't apply this optimization if the total number of results 
                        // to index is more than the max numbers to index in a single batch. 
                        // The idea here is that we need to keep the amount
                        // of memory we use to a manageable level even when introducing a new index to a BIG 
                        // database
                        try
                        {
                            cts.Cancel();
                            // we have to run just a little bit of the query to properly setup the disposal
                            op.Execute(o => { });
                        }
                        catch (OperationCanceledException)
                        {
                        }
                        return;
                    }

                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", 
                            index, op.Header.TotalResults);
                    }
                        
                    var totalLoadedDocumentSize = 0;
                    const int totalSizeToCheck = 16 * 1024 * 1024; //16MB
                    var localLoadedDocumentSize = 0;
                    op.Execute(document =>
                    {
                        var metadata = document.Value<RavenJObject>(Constants.Metadata);
                        var key = metadata.Value<string>("@id");
                        var etag = Etag.Parse(metadata.Value<string>("@etag"));
                        var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified));
                        document.Remove(Constants.Metadata);
                        var serializedSizeOnDisk = metadata.Value<int>(Constants.SerializedSizeOnDisk);
                        metadata.Remove(Constants.SerializedSizeOnDisk);

                        var doc = new JsonDocument
                        {
                            DataAsJson = document,
                            Etag = etag,
                            Key = key,
                            SerializedSizeOnDisk = serializedSizeOnDisk,
                            LastModified = lastModified,
                            SkipDeleteFromIndex = true,
                            Metadata = metadata
                        };

                        docsToIndex.Add(doc);
                        totalLoadedDocumentSize += serializedSizeOnDisk;
                        localLoadedDocumentSize += serializedSizeOnDisk;

                        if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes)
                        {
                            var error = string.Format(
                                @"Aborting applying precomputed batch for index id: {0}, name: {1}
                                    because we have {2}mb of documents that were fetched
                                    and the configured max data to fetch is {3}mb",
                                index.indexId, index.PublicName, totalLoadedDocumentSize, 
                                Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024);

                            //we are aborting operation, so don't keep the references
                            docsToIndex.Clear(); 
                            throw new TotalDataSizeExceededException(error);
                        }


                        if (localLoadedDocumentSize <= totalSizeToCheck)
                            return;

                        localLoadedDocumentSize = 0;

                        if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb)
                        {
                            var error = string.Format(
                                @"Aborting applying precomputed batch for index id: {0}, name: {1}
                                    because we have {2}mb of available memory and the available memory for processing is: {3}mb",
                                index.indexId, index.PublicName, 
                                MemoryStatistics.AvailableMemoryInMb, Database.Configuration.MemoryLimitForProcessingInMb);

                            //we are aborting operation, so don't keep the references
                            docsToIndex.Clear();
                            throw new TotalDataSizeExceededException(error);
                        }
                    });

                    result = new PrecomputedIndexingBatch
                    {
                        LastIndexed = op.Header.IndexEtag,
                        LastModified = op.Header.IndexTimestamp,
                        Documents = docsToIndex,
                        Index = index
                    };
                }
            });

            if (result != null && result.Documents != null && result.Documents.Count >= 0)
            {
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                {
                    Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token);

                    if (index.IsTestIndex)
                        TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId));
                }
            }

        }