public override void AfterDelete(string key, TransactionInformation transactionInformation) { var versioningConfig = Database.GetDocumentVersioningConfiguration(versionInformer.Value[key]); using (Database.DisableAllTriggersForCurrentThread()) { Database.TransactionalStorage.Batch(accessor => { using (DocumentCacher.SkipSetDocumentsInDocumentCache()) { foreach (var jsonDocument in accessor.Documents.GetDocumentsWithIdStartingWith(key + "/revisions/", 0, int.MaxValue, null)) { if (jsonDocument == null) { continue; } if (versioningConfig != null && versioningConfig.PurgeOnDelete) { Database.Documents.Delete(jsonDocument.Key, null, transactionInformation); } else { jsonDocument.Metadata.Remove(Constants.RavenReadOnly); accessor.Documents.AddDocument(jsonDocument.Key, jsonDocument.Etag, jsonDocument.DataAsJson, jsonDocument.Metadata); } } } }); } }
private HttpResponseMessage OnBulkOperation(Func <string, IndexQuery, BulkOperationOptions, Action <BulkOperationProgress>, RavenJArray> batchOperation, string index, CancellationTimeout timeout) { if (string.IsNullOrEmpty(index)) { return(GetEmptyMessage(HttpStatusCode.BadRequest)); } var option = new BulkOperationOptions { AllowStale = GetAllowStale(), MaxOpsPerSec = GetMaxOpsPerSec(), StaleTimeout = GetStaleTimeout(), RetrieveDetails = GetRetrieveDetails() }; var indexQuery = GetIndexQuery(maxPageSize: int.MaxValue); var status = new BulkOperationStatus(); long id; var task = Task.Factory.StartNew(() => { using (DocumentCacher.SkipSetDocumentsInDocumentCache()) { status.State["Batch"] = batchOperation(index, indexQuery, option, x => { status.MarkProgress(x); }); } }).ContinueWith(t => { if (timeout != null) { timeout.Dispose(); } if (t.IsFaulted == false) { status.MarkCompleted($"Processed {status.OperationProgress.ProcessedEntries} items"); return; } var exception = t.Exception.ExtractSingleInnerException(); status.MarkFaulted(exception.Message); }); Database.Tasks.AddTask(task, status, new TaskActions.PendingTaskDescription { StartTime = SystemTime.UtcNow, TaskType = TaskActions.PendingTaskType.IndexBulkOperation, Description = index }, out id, timeout.CancellationTokenSource); return(GetMessageWithObject(new { OperationId = id }, HttpStatusCode.Accepted)); }
public RavenJArray GetDocumentsWithIdStartingWith(string idPrefix, string matches, string exclude, int start, int pageSize, CancellationToken token, ref int nextStart, string transformer = null, Dictionary <string, RavenJToken> transformerParameters = null, string skipAfter = null) { using (DocumentCacher.SkipSetDocumentsInDocumentCache()) { var list = new RavenJArray(); GetDocumentsWithIdStartingWith(idPrefix, matches, exclude, start, pageSize, token, ref nextStart, doc => { if (doc != null) { list.Add(doc.ToJson()); } }, transformer, transformerParameters, skipAfter); return(list); } }
private void WriteDocuments(JsonTextWriter jsonWriter) { long totalDocsCount = 0; storage.Batch(accsesor => totalDocsCount = accsesor.Documents.GetDocumentsCount()); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) { if (DocumentsStartEtag == Etag.Empty) { ExtractDocuments(jsonWriter, totalDocsCount); } else { ExtractDocumentsFromEtag(jsonWriter, totalDocsCount); } } }
public Etag GetDocuments(int start, int pageSize, Etag etag, CancellationToken token, Func <JsonDocument, bool> addDocument, string transformer = null, Dictionary <string, RavenJToken> transformerParameters = null, long?maxSize = null, TimeSpan?timeout = null) { Etag lastDocumentReadEtag = null; using (DocumentCacher.SkipSetDocumentsInDocumentCache()) TransactionalStorage.Batch(actions => { AbstractTransformer storedTransformer = null; if (transformer != null) { storedTransformer = IndexDefinitionStorage.GetTransformer(transformer); if (storedTransformer == null) { throw new InvalidOperationException("No transformer with the name: " + transformer); } } var returnedDocs = false; while (true) { var documents = etag == null ? actions.Documents.GetDocumentsByReverseUpdateOrder(start, pageSize) : actions.Documents.GetDocumentsAfter(etag, pageSize, token, maxSize: maxSize, timeout: timeout); var documentRetriever = new DocumentRetriever(Database.Configuration, actions, Database.ReadTriggers, transformerParameters); var docCount = 0; var docCountOnLastAdd = 0; foreach (var doc in documents) { docCount++; token.ThrowIfCancellationRequested(); if (docCount - docCountOnLastAdd > 1000) { addDocument(null); // heartbeat } if (etag != null) { etag = doc.Etag; } JsonDocument.EnsureIdInMetadata(doc); var nonAuthoritativeInformationBehavior = actions.InFlightStateSnapshot.GetNonAuthoritativeInformationBehavior <JsonDocument>(null, doc.Key); var document = nonAuthoritativeInformationBehavior == null ? doc : nonAuthoritativeInformationBehavior(doc); document = documentRetriever.ExecuteReadTriggers(document, null, ReadOperation.Load); if (document == null) { continue; } returnedDocs = true; Database.WorkContext.UpdateFoundWork(); document = TransformDocumentIfNeeded(document, storedTransformer, documentRetriever); var canContinue = addDocument(document); if (!canContinue) { break; } lastDocumentReadEtag = etag; docCountOnLastAdd = docCount; } if (returnedDocs || docCount == 0) { break; } // No document was found that matches the requested criteria // If we had a failure happen, we update the etag as we don't need to process those documents again (no matches there anyways). if (lastDocumentReadEtag != null) { etag = lastDocumentReadEtag; } start += docCount; } }); return(lastDocumentReadEtag); }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator, int pageSize, CancellationTokenSource cts) { PrecomputedIndexingBatch result = null; var docsToIndex = new List <JsonDocument>(); TransactionalStorage.Batch(actions => { var query = QueryBuilder.GetQueryForAllMatchingDocumentsForIndex(Database, generator.ForEntityNames); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, Constants.DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = pageSize }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); //if we are working on a test index, apply the optimization anyway, as the index is capped by small number of results if (op.Header.TotalResults > pageSize && index.IsTestIndex == false) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } if (Log.IsDebugEnabled) { Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); } var totalLoadedDocumentSize = 0; const int totalSizeToCheck = 16 * 1024 * 1024; //16MB var localLoadedDocumentSize = 0; op.Execute(document => { var metadata = document.Value <RavenJObject>(Constants.Metadata); var key = metadata.Value <string>("@id"); var etag = Etag.Parse(metadata.Value <string>("@etag")); var lastModified = DateTime.Parse(metadata.Value <string>(Constants.LastModified)); document.Remove(Constants.Metadata); var serializedSizeOnDisk = metadata.Value <int>(Constants.SerializedSizeOnDisk); metadata.Remove(Constants.SerializedSizeOnDisk); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, SerializedSizeOnDisk = serializedSizeOnDisk, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); totalLoadedDocumentSize += serializedSizeOnDisk; localLoadedDocumentSize += serializedSizeOnDisk; if (totalLoadedDocumentSize > Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes) { var error = $"Aborting applying precomputed batch for index id: {index.indexId}, " + $"name: {index.PublicName} because we have {totalLoadedDocumentSize}MB of documents that were fetched" + $"and the configured max data to fetch is " + $"{Database.Configuration.MaxPrecomputedBatchTotalDocumentSizeInBytes/1024/1024}MB"; //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } if (localLoadedDocumentSize <= totalSizeToCheck) { return; } localLoadedDocumentSize = 0; if (Database.Configuration.MemoryLimitForProcessingInMb > MemoryStatistics.AvailableMemoryInMb) { var error = $"Aborting applying precomputed batch for index id: {index.indexId}, " + $"name: {index.PublicName} because we have {MemoryStatistics.AvailableMemoryInMb}MB " + $"of available memory and the available memory for processing is: " + $"{Database.Configuration.MemoryLimitForProcessingInMb}MB"; //we are aborting operation, so don't keep the references docsToIndex.Clear(); throw new TotalDataSizeExceededException(error); } }); result = new PrecomputedIndexingBatch { LastIndexed = op.Header.IndexEtag, LastModified = op.Header.IndexTimestamp, Documents = docsToIndex, Index = index }; } }); if (result != null && result.Documents != null && result.Documents.Count >= 0) { using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) { Database.IndexingExecuter.IndexPrecomputedBatch(result, linked.Token); if (index.IsTestIndex) { TransactionalStorage.Batch(accessor => accessor.Indexing.TouchIndexEtag(index.IndexId)); } } } }
private List <JsonDocument> GetJsonDocsFromDisk(Etag etag, Etag untilEtag, Reference <bool> earlyExit = null) { List <JsonDocument> jsonDocs = null; // We take an snapshot because the implementation of accessing Values from a ConcurrentDictionary involves a lock. // Taking the snapshot should be safe enough. var currentlyUsedBatchSizesInBytes = Size.Sum(autoTuner.CurrentlyUsedBatchSizesInBytes.Values); using (DocumentCacher.SkipSetDocumentsInDocumentCache()) context.TransactionalStorage.Batch(actions => { //limit how much data we load from disk --> better adhere to memory limits var totalSizeAllowedToLoadInBytes = (context.Configuration.Memory.DynamicLimitForProcessing) - (prefetchingQueue.LoadedSize + currentlyUsedBatchSizesInBytes); // at any rate, we will load a min of 512Kb docs long maxSize = Size.Max(Size.Min(totalSizeAllowedToLoadInBytes, autoTuner.MaximumSizeAllowedToFetchFromStorage), minSizeToLoadDocs).GetValue(SizeUnit.Bytes); var sp = Stopwatch.StartNew(); var totalSize = 0L; var largestDocSize = 0L; string largestDocKey = null; jsonDocs = actions.Documents .GetDocumentsAfter( etag, autoTuner.NumberOfItemsToProcessInSingleBatch, context.CancellationToken, maxSize, untilEtag, autoTuner.FetchingDocumentsFromDiskTimeout, earlyExit: earlyExit ) .Where(x => x != null) .Select(doc => { if (largestDocSize < doc.SerializedSizeOnDisk) { largestDocSize = doc.SerializedSizeOnDisk; largestDocKey = doc.Key; } totalSize += doc.SerializedSizeOnDisk; JsonDocument.EnsureIdInMetadata(doc); return(doc); }) .ToList(); loadTimes.Enqueue(new DiskFetchPerformanceStats { LoadingTimeInMillseconds = sp.ElapsedMilliseconds, NumberOfDocuments = jsonDocs.Count, TotalSize = totalSize, LargestDocSize = largestDocSize, LargestDocKey = largestDocKey }); while (loadTimes.Count > 8) { DiskFetchPerformanceStats _; loadTimes.TryDequeue(out _); } }); return(jsonDocs); }