private void MaybeAddFutureBatch(List <JsonDocument> past) { if (context.Configuration.DisableDocumentPreFetchingForIndexing || context.RunIndexing == false) { return; } if (context.Configuration.MaxNumberOfParallelIndexTasks == 1) { return; } if (past.Count == 0) { return; } if (prefetchingQueue.LoadedSize > autoTuner.MaximumSizeAllowedToFetchFromStorage) { return; // already have too much in memory } // don't keep _too_ much in memory if (prefetchingQueue.Count > context.Configuration.MaxNumberOfItemsToIndexInSingleBatch * 2) { return; } var size = 1024; var count = context.LastActualIndexingBatchInfo.Count; if (count > 0) { size = context.LastActualIndexingBatchInfo.Aggregate(0, (o, c) => o + c.TotalDocumentCount) / count; } var alreadyLoadedSize = futureIndexBatches.Values.Sum(x => { if (x.Task.IsCompleted) { return(x.Task.Result.Sum(doc => doc.SerializedSizeOnDisk)); } return(size); }); if (alreadyLoadedSize > context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit) { return; } if (futureIndexBatches.Count > 5) // we limit the number of future calls we do { int alreadyLoaded = futureIndexBatches.Values.Sum(x => { if (x.Task.IsCompleted) { return(x.Task.Result.Count); } return(autoTuner.NumberOfItemsToIndexInSingleBatch / 4 * 3); }); if (alreadyLoaded > autoTuner.NumberOfItemsToIndexInSingleBatch) { return; } } // ensure we don't do TOO much future caching if (MemoryStatistics.AvailableMemory < context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit) { return; } // we loaded the maximum amount, there are probably more items to read now. Etag highestLoadedEtag = GetHighestEtag(past); Etag nextEtag = GetNextDocumentEtagFromDisk(highestLoadedEtag); if (nextEtag == highestLoadedEtag) { return; // there is nothing newer to do } if (futureIndexBatches.ContainsKey(nextEtag)) // already loading this { return; } var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, }; Stopwatch sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); futureIndexBatches.TryAdd(nextEtag, new FutureIndexBatch { StartingEtag = nextEtag, Age = Interlocked.Increment(ref currentIndexingAge), Task = Task.Factory.StartNew(() => { List <JsonDocument> jsonDocuments = null; int localWork = 0; while (context.RunIndexing) { jsonDocuments = GetJsonDocsFromDisk(Abstractions.Util.EtagUtil.Increment(nextEtag, -1), null); if (jsonDocuments.Count > 0) { break; } futureBatchStat.Retries++; context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching"); } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count; if (jsonDocuments != null) { MaybeAddFutureBatch(jsonDocuments); } return(jsonDocuments); }) }); }
private void AddFutureBatch(Etag nextEtag, Etag untilEtag) { var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, PrefetchingUser = PrefetchingUser }; Stopwatch sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); var futureIndexBatch = new FutureIndexBatch { StartingEtag = nextEtag, Age = Interlocked.Increment(ref currentIndexingAge), Task = Task.Factory.StartNew(() => { List <JsonDocument> jsonDocuments = null; int localWork = 0; var earlyExit = new Reference <bool>(); while (context.RunIndexing) { jsonDocuments = GetJsonDocsFromDisk(Abstractions.Util.EtagUtil.Increment(nextEtag, -1), untilEtag, earlyExit); if (jsonDocuments.Count > 0) { break; } futureBatchStat.Retries++; context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching"); } if (log.IsDebugEnabled && jsonDocuments != null) { var size = jsonDocuments.Sum(x => x.SerializedSizeOnDisk) / 1024; log.Debug("Got {0} documents ({3:#,#;;0} kb) in a future batch, starting from etag {1}, took {2:#,#;;0}ms", jsonDocuments.Count, nextEtag, sp.ElapsedMilliseconds, size); if (size > jsonDocuments.Count * 8 || sp.ElapsedMilliseconds > 3000) { if (log.IsDebugEnabled) { var topSizes = jsonDocuments .OrderByDescending(x => x.SerializedSizeOnDisk) .Take(10) .Select(x => string.Format("{0} - {1:#,#;;0}kb", x.Key, x.SerializedSizeOnDisk / 1024)); log.Debug("Slow load of documents in batch, maybe large docs? Top 10 largest docs are: ({0})", string.Join(", ", topSizes)); } } } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count; if (jsonDocuments == null) { return(null); } if (untilEtag != null && earlyExit.Value) { var lastEtag = GetHighestEtag(jsonDocuments); context.TransactionalStorage.Batch(accessor => { lastEtag = accessor.Documents.GetBestNextDocumentEtag(lastEtag); }); if (log.IsDebugEnabled) { log.Debug("Early exit from last future splitted batch, need to fetch documents from etag: {0} to etag: {1}", lastEtag, untilEtag); } AddFutureBatch(lastEtag, untilEtag); } else { MaybeAddFutureBatch(jsonDocuments); } return(jsonDocuments); }).ContinueWith(t => { t.AssertNotFailed(); return(t.Result); }) }; futureIndexBatch.Task.ContinueWith(t => { FutureBatchCompleted(t.Result.Count); }); futureIndexBatches.TryAdd(nextEtag, futureIndexBatch); }