private List <JsonDocument> GetDocsFromBatchWithPossibleDuplicates(Etag etag) { var result = new List <JsonDocument>(); bool docsLoaded; do { var nextEtagToIndex = GetNextDocEtag(etag); var firstEtagInQueue = prefetchingQueue.NextDocumentETag(); if (nextEtagToIndex != firstEtagInQueue) { if (TryLoadDocumentsFromFutureBatches(nextEtagToIndex) == false) { LoadDocumentsFromDisk(etag, firstEtagInQueue); // here we _intentionally_ use the current etag, not the next one } } docsLoaded = TryGetDocumentsFromQueue(nextEtagToIndex, result); if (docsLoaded) { etag = result[result.Count - 1].Etag; } } while (result.Count < autoTuner.NumberOfItemsToIndexInSingleBatch && docsLoaded && (prefetchingQueue.Aggregate(0, (acc, doc) => acc + doc.SerializedSizeOnDisk) + autoTuner.CurrentlyUsedBatchSizes.Values.Sum()) < context.Configuration.MemoryLimitForIndexingInMB); return(result); }
private List <JsonDocument> GetDocsFromBatchWithPossibleDuplicates(Etag etag) { var result = new List <JsonDocument>(); bool docsLoaded; int prefetchingQueueSizeInBytes; var prefetchingDurationTimer = Stopwatch.StartNew(); do { var nextEtagToIndex = GetNextDocEtag(etag); var firstEtagInQueue = prefetchingQueue.NextDocumentETag(); if (nextEtagToIndex != firstEtagInQueue) { if (TryLoadDocumentsFromFutureBatches(nextEtagToIndex) == false) { LoadDocumentsFromDisk(etag, firstEtagInQueue); // here we _intentionally_ use the current etag, not the next one } } docsLoaded = TryGetDocumentsFromQueue(nextEtagToIndex, result); if (docsLoaded) { etag = result[result.Count - 1].Etag; } prefetchingQueueSizeInBytes = prefetchingQueue.Aggregate(0, (acc, doc) => acc + doc.SerializedSizeOnDisk); } while (result.Count < autoTuner.NumberOfItemsToProcessInSingleBatch && docsLoaded && prefetchingDurationTimer.ElapsedMilliseconds <= context.Configuration.PrefetchingDurationLimit && ((prefetchingQueueSizeInBytes + autoTuner.CurrentlyUsedBatchSizesInBytes.Values.Sum()) < (context.Configuration.MemoryLimitForProcessingInMb * 1024 * 1024))); return(result); }