Beispiel #1
0
        private List <JsonDocument> GetDocsFromBatchWithPossibleDuplicates(Etag etag)
        {
            var  result = new List <JsonDocument>();
            bool docsLoaded;

            do
            {
                var nextEtagToIndex  = GetNextDocEtag(etag);
                var firstEtagInQueue = prefetchingQueue.NextDocumentETag();

                if (nextEtagToIndex != firstEtagInQueue)
                {
                    if (TryLoadDocumentsFromFutureBatches(nextEtagToIndex) == false)
                    {
                        LoadDocumentsFromDisk(etag, firstEtagInQueue);                         // here we _intentionally_ use the current etag, not the next one
                    }
                }

                docsLoaded = TryGetDocumentsFromQueue(nextEtagToIndex, result);

                if (docsLoaded)
                {
                    etag = result[result.Count - 1].Etag;
                }
            } while (result.Count < autoTuner.NumberOfItemsToIndexInSingleBatch &&
                     docsLoaded &&
                     (prefetchingQueue.Aggregate(0, (acc, doc) => acc + doc.SerializedSizeOnDisk) +
                      autoTuner.CurrentlyUsedBatchSizes.Values.Sum()) < context.Configuration.MemoryLimitForIndexingInMB);


            return(result);
        }
Beispiel #2
0
        private List <JsonDocument> GetDocsFromBatchWithPossibleDuplicates(Etag etag)
        {
            var  result = new List <JsonDocument>();
            bool docsLoaded;
            int  prefetchingQueueSizeInBytes;
            var  prefetchingDurationTimer = Stopwatch.StartNew();

            do
            {
                var nextEtagToIndex  = GetNextDocEtag(etag);
                var firstEtagInQueue = prefetchingQueue.NextDocumentETag();

                if (nextEtagToIndex != firstEtagInQueue)
                {
                    if (TryLoadDocumentsFromFutureBatches(nextEtagToIndex) == false)
                    {
                        LoadDocumentsFromDisk(etag, firstEtagInQueue);                         // here we _intentionally_ use the current etag, not the next one
                    }
                }

                docsLoaded = TryGetDocumentsFromQueue(nextEtagToIndex, result);

                if (docsLoaded)
                {
                    etag = result[result.Count - 1].Etag;
                }

                prefetchingQueueSizeInBytes = prefetchingQueue.Aggregate(0, (acc, doc) => acc + doc.SerializedSizeOnDisk);
            } while (result.Count < autoTuner.NumberOfItemsToProcessInSingleBatch && docsLoaded &&
                     prefetchingDurationTimer.ElapsedMilliseconds <= context.Configuration.PrefetchingDurationLimit &&
                     ((prefetchingQueueSizeInBytes + autoTuner.CurrentlyUsedBatchSizesInBytes.Values.Sum()) < (context.Configuration.MemoryLimitForProcessingInMb * 1024 * 1024)));

            return(result);
        }