private void MaybeAddFutureBatch(List<JsonDocument> past) { if (context.Configuration.DisableDocumentPreFetchingForIndexing || context.RunIndexing == false) return; if (context.Configuration.MaxNumberOfParallelIndexTasks == 1) return; if (past.Count == 0) return; if (futureIndexBatches.Count > 5) // we limit the number of future calls we do { int alreadyLoaded = futureIndexBatches.Values.Sum(x => { if (x.Task.IsCompleted) return x.Task.Result.Count; return autoTuner.NumberOfItemsToIndexInSingleBatch / 4 * 3; }); if (alreadyLoaded > autoTuner.NumberOfItemsToIndexInSingleBatch) return; } // ensure we don't do TOO much future caching if (MemoryStatistics.AvailableMemory < context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit) return; // we loaded the maximum amount, there are probably more items to read now. Etag highestLoadedEtag = GetHighestEtag(past); Etag nextEtag = GetNextDocumentEtagFromDisk(highestLoadedEtag); if (nextEtag == highestLoadedEtag) return; // there is nothing newer to do if (futureIndexBatches.ContainsKey(nextEtag)) // already loading this return; var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, }; Stopwatch sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); futureIndexBatches.TryAdd(nextEtag, new FutureIndexBatch { StartingEtag = nextEtag, Age = Interlocked.Increment(ref currentIndexingAge), Task = Task.Factory.StartNew(() => { List<JsonDocument> jsonDocuments = null; int localWork = 0; while (context.RunIndexing) { jsonDocuments = GetJsonDocsFromDisk(EtagUtil.Increment(nextEtag, -1), null); if (jsonDocuments.Count > 0) break; futureBatchStat.Retries++; context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching"); } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count; if (jsonDocuments != null) { MaybeAddFutureBatch(jsonDocuments); } return jsonDocuments; }) }); }
private void MaybeAddFutureBatch(JsonResults past) { if (context.Configuration.MaxNumberOfParallelIndexTasks == 1) return; if (past.Results.Length == 0 || past.LoadedFromDisk == false) return; if (futureIndexBatches.Count > 5) // we limit the number of future calls we do { var alreadyLoaded = futureIndexBatches.Sum(x => { if (x.Task.IsCompleted) return x.Task.Result.Results.Length; return 0; }); if (alreadyLoaded > autoTuner.NumberOfItemsToIndexInSingleBatch) return; } // ensure we don't do TOO much future cachings if (MemoryStatistics.AvailableMemory < context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit) return; // we loaded the maximum amount, there are probably more items to read now. var nextEtag = GetNextHighestEtag(past.Results); var nextBatch = futureIndexBatches.FirstOrDefault(x => x.StartingEtag == nextEtag); if (nextBatch != null) return; var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, }; var sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); futureIndexBatches.Add(new FutureIndexBatch { StartingEtag = nextEtag, Age = currentIndexingAge, Task = System.Threading.Tasks.Task.Factory.StartNew(() => { var jsonDocuments = GetJsonDocuments(nextEtag); int localWork = workCounter; while (jsonDocuments.Results.Length == 0 && context.DoWork) { futureBatchStat.Retries++; if (context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching") == false) continue; jsonDocuments = GetJsonDocuments(nextEtag); } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments.Results.Length; MaybeAddFutureBatch(jsonDocuments); return jsonDocuments; }) }); }
public void AddFutureBatch(FutureBatchStats futureBatchStat) { futureBatchStats.Add(futureBatchStat); if (futureBatchStats.Count <= 30) return; foreach (var source in futureBatchStats.OrderBy(x => x.Timestamp).Take(5)) { futureBatchStats.TryRemove(source); } }
private bool AddFutureBatch(Etag nextEtag, Etag untilEtag, bool isSplitted = false, bool isEarlyExitBatch = false) { var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, PrefetchingUser = PrefetchingUser }; Stopwatch sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); var cts = new CancellationTokenSource(); var linkedToken = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, context.CancellationToken); var futureIndexBatch = new FutureIndexBatch { StartingEtag = nextEtag, Age = Interlocked.Increment(ref currentIndexingAge), CancellationTokenSource = linkedToken, IsSplitted = isSplitted, Task = Task.Factory.StartNew(() => { List<JsonDocument> jsonDocuments = null; int localWork = 0; var earlyExit = new Reference<bool>(); while (context.RunIndexing) { linkedToken.Token.ThrowIfCancellationRequested(); jsonDocuments = GetJsonDocsFromDisk( linkedToken.Token, Abstractions.Util.EtagUtil.Increment(nextEtag, -1), untilEtag, earlyExit); if (jsonDocuments.Count > 0) break; futureBatchStat.Retries++; context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching"); } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count; if (jsonDocuments == null) return null; LogEarlyExit(nextEtag, untilEtag, isEarlyExitBatch, jsonDocuments, sp.ElapsedMilliseconds); if (untilEtag != null && earlyExit.Value) { var lastEtag = GetHighestEtag(jsonDocuments); context.TransactionalStorage.Batch(accessor => { lastEtag = accessor.Documents.GetBestNextDocumentEtag(lastEtag); }); if (log.IsDebugEnabled) { log.Debug("Early exit from last future splitted batch, need to fetch documents from etag: {0} to etag: {1}", lastEtag, untilEtag); } linkedToken.Token.ThrowIfCancellationRequested(); AddFutureBatch(lastEtag, untilEtag, isEarlyExitBatch: true); } else { linkedToken.Token.ThrowIfCancellationRequested(); MaybeAddFutureBatch(jsonDocuments); } return jsonDocuments; }, linkedToken.Token) .ContinueWith(t => { t.AssertNotFailed(); linkedToken = null; return t.Result; }, linkedToken.Token) }; futureIndexBatch.Task.ContinueWith(t => { FutureBatchCompleted(t.Result.Count); }, linkedToken.Token); return futureIndexBatches.TryAdd(nextEtag, futureIndexBatch); }
private bool AddFutureBatch(Etag nextEtag, Etag untilEtag, FutureBatchType batchType, int? docsCount = null) { var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, PrefetchingUser = PrefetchingUser }; var sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); var docsCountRef = new Reference<int?>() {Value = docsCount}; var cts = new CancellationTokenSource(); var linkedToken = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, context.CancellationToken); var futureIndexBatch = new FutureIndexBatch { StartingEtag = nextEtag, Age = Interlocked.Increment(ref currentIndexingAge), CancellationTokenSource = cts, Type = batchType, DocsCount = docsCountRef, Task = Task.Run(() => { List<JsonDocument> jsonDocuments = null; int localWork = 0; var earlyExit = new Reference<bool>(); while (context.RunIndexing) { linkedToken.Token.ThrowIfCancellationRequested(); jsonDocuments = GetJsonDocsFromDisk( linkedToken.Token, Abstractions.Util.EtagUtil.Increment(nextEtag, -1), untilEtag, earlyExit); if (jsonDocuments.Count > 0) break; futureBatchStat.Retries++; context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching"); } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count; if (jsonDocuments == null) return null; LogEarlyExit(nextEtag, untilEtag, batchType == FutureBatchType.EarlyExit, jsonDocuments, sp.ElapsedMilliseconds); if (untilEtag != null && earlyExit.Value) { var lastEtag = GetHighestEtag(jsonDocuments); context.TransactionalStorage.Batch(accessor => { lastEtag = accessor.Documents.GetBestNextDocumentEtag(lastEtag); }); if (log.IsDebugEnabled) { log.Debug("Early exit from last future splitted batch, need to fetch documents from etag: {0} to etag: {1}", lastEtag, untilEtag); } linkedToken.Token.ThrowIfCancellationRequested(); docsCountRef.Value = jsonDocuments.Count; var docsLeft = docsCount - jsonDocuments.Count; if (docsLeft > 0 && lastEtag.CompareTo(untilEtag) <= 0) AddFutureBatch(lastEtag, untilEtag, FutureBatchType.EarlyExit, docsLeft); } else { linkedToken.Token.ThrowIfCancellationRequested(); MaybeAddFutureBatch(jsonDocuments); } return jsonDocuments; }, linkedToken.Token) .ContinueWith(t => { using (cts) using (linkedToken) { t.AssertNotFailed(); } return t.Result; }) }; futureIndexBatch.Task.ContinueWith(t => { try { if (linkedToken.IsCancellationRequested == false) FutureBatchCompleted(t.Result.Count); } catch (ObjectDisposedException) { // this is an expected race with the actual task, this is fine } }); var addFutureBatch = futureIndexBatches.TryAdd(nextEtag, futureIndexBatch); if (addFutureBatch == false) { log.Info(string.Format("A future batch starting with {0} etag is already running", nextEtag)); cts.Cancel(); } return addFutureBatch; }
private void MaybeAddFutureBatch(List<JsonDocument> past) { if (context.Configuration.DisableDocumentPreFetching || context.RunIndexing == false) return; if (context.Configuration.MaxNumberOfParallelProcessingTasks == 1) return; if (past.Count == 0) return; if (prefetchingQueue.LoadedSize > autoTuner.MaximumSizeAllowedToFetchFromStorageInBytes) return; // already have too much in memory // don't keep _too_ much in memory if (prefetchingQueue.Count > context.Configuration.MaxNumberOfItemsToProcessInSingleBatch * 2) return; var size = 1024; var count = context.LastActualIndexingBatchInfo.Count; if (count > 0) { size = context.LastActualIndexingBatchInfo.Aggregate(0, (o, c) => o + c.TotalDocumentCount)/count; } var alreadyLoadedSize = futureIndexBatches.Values.Sum(x => { if (x.Task.IsCompleted) return x.Task.Result.Sum(doc => doc.SerializedSizeOnDisk); return size; }); if (alreadyLoadedSize > context.Configuration.AvailableMemoryForRaisingBatchSizeLimit) return; if(MemoryStatistics.IsLowMemory) return; if (futureIndexBatches.Count > 5) // we limit the number of future calls we do { int alreadyLoaded = futureIndexBatches.Values.Sum(x => { if (x.Task.IsCompleted) return x.Task.Result.Count; return autoTuner.NumberOfItemsToProcessInSingleBatch / 4 * 3; }); if (alreadyLoaded > autoTuner.NumberOfItemsToProcessInSingleBatch) return; } // ensure we don't do TOO much future caching if (MemoryStatistics.AvailableMemory < context.Configuration.AvailableMemoryForRaisingBatchSizeLimit) return; // we loaded the maximum amount, there are probably more items to read now. Etag highestLoadedEtag = GetHighestEtag(past); Etag nextEtag = GetNextDocumentEtagFromDisk(highestLoadedEtag); if (nextEtag == highestLoadedEtag) return; // there is nothing newer to do if (futureIndexBatches.ContainsKey(nextEtag)) // already loading this return; var futureBatchStat = new FutureBatchStats { Timestamp = SystemTime.UtcNow, PrefetchingUser = PrefetchingUser }; Stopwatch sp = Stopwatch.StartNew(); context.AddFutureBatch(futureBatchStat); futureIndexBatches.TryAdd(nextEtag, new FutureIndexBatch { StartingEtag = nextEtag, Age = Interlocked.Increment(ref currentIndexingAge), Task = Task.Factory.StartNew(() => { List<JsonDocument> jsonDocuments = null; int localWork = 0; while (context.RunIndexing) { jsonDocuments = GetJsonDocsFromDisk(Abstractions.Util.EtagUtil.Increment(nextEtag, -1), null); if (jsonDocuments.Count > 0) break; futureBatchStat.Retries++; context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching"); } futureBatchStat.Duration = sp.Elapsed; futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count; if (jsonDocuments != null) { MaybeAddFutureBatch(jsonDocuments); } return jsonDocuments; }) }); }