예제 #1
0
		private void MaybeAddFutureBatch(List<JsonDocument> past)
		{
			if (context.Configuration.DisableDocumentPreFetchingForIndexing || context.RunIndexing == false)
				return;
			if (context.Configuration.MaxNumberOfParallelIndexTasks == 1)
				return;
			if (past.Count == 0)
				return;
			if (futureIndexBatches.Count > 5) // we limit the number of future calls we do
			{
				int alreadyLoaded = futureIndexBatches.Values.Sum(x =>
				{
					if (x.Task.IsCompleted)
						return x.Task.Result.Count;
					return autoTuner.NumberOfItemsToIndexInSingleBatch / 4 * 3;
				});

				if (alreadyLoaded > autoTuner.NumberOfItemsToIndexInSingleBatch)
					return;
			}

			// ensure we don't do TOO much future caching
			if (MemoryStatistics.AvailableMemory <
				context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
				return;

			// we loaded the maximum amount, there are probably more items to read now.
			Etag highestLoadedEtag = GetHighestEtag(past);
			Etag nextEtag = GetNextDocumentEtagFromDisk(highestLoadedEtag);

			if (nextEtag == highestLoadedEtag)
				return; // there is nothing newer to do 

			if (futureIndexBatches.ContainsKey(nextEtag)) // already loading this
				return;

			var futureBatchStat = new FutureBatchStats
			{
				Timestamp = SystemTime.UtcNow,
			};
			Stopwatch sp = Stopwatch.StartNew();
			context.AddFutureBatch(futureBatchStat);
			futureIndexBatches.TryAdd(nextEtag, new FutureIndexBatch
			{
				StartingEtag = nextEtag,
				Age = Interlocked.Increment(ref currentIndexingAge),
				Task = Task.Factory.StartNew(() =>
				{
					List<JsonDocument> jsonDocuments = null;
					int localWork = 0;
					while (context.RunIndexing)
					{
						jsonDocuments = GetJsonDocsFromDisk(EtagUtil.Increment(nextEtag, -1), null);
						if (jsonDocuments.Count > 0)
							break;

						futureBatchStat.Retries++;

						context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching");
					}
					futureBatchStat.Duration = sp.Elapsed;
					futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count;
					if (jsonDocuments != null)
					{
						MaybeAddFutureBatch(jsonDocuments);
					}
					return jsonDocuments;
				})
			});
		}
예제 #2
0
		private void MaybeAddFutureBatch(JsonResults past)
		{
			if (context.Configuration.MaxNumberOfParallelIndexTasks == 1)
				return;
			if (past.Results.Length == 0 || past.LoadedFromDisk == false)
				return;
			if (futureIndexBatches.Count > 5) // we limit the number of future calls we do
			{
				var alreadyLoaded = futureIndexBatches.Sum(x =>
				{
					if (x.Task.IsCompleted)
						return x.Task.Result.Results.Length;
					return 0;
				});

				if (alreadyLoaded > autoTuner.NumberOfItemsToIndexInSingleBatch)
					return;
			}

			// ensure we don't do TOO much future cachings
			if (MemoryStatistics.AvailableMemory < context.Configuration.AvailableMemoryForRaisingIndexBatchSizeLimit)
				return;

			// we loaded the maximum amount, there are probably more items to read now.
			var nextEtag = GetNextHighestEtag(past.Results);

			var nextBatch = futureIndexBatches.FirstOrDefault(x => x.StartingEtag == nextEtag);

			if (nextBatch != null)
				return;

			var futureBatchStat = new FutureBatchStats
			{
				Timestamp = SystemTime.UtcNow,
			};
			var sp = Stopwatch.StartNew();
			context.AddFutureBatch(futureBatchStat);
			futureIndexBatches.Add(new FutureIndexBatch
			{
				StartingEtag = nextEtag,
				Age = currentIndexingAge,
				Task = System.Threading.Tasks.Task.Factory.StartNew(() =>
				{
					var jsonDocuments = GetJsonDocuments(nextEtag);
					int localWork = workCounter;
					while (jsonDocuments.Results.Length == 0 && context.DoWork)
					{
						futureBatchStat.Retries++;

						if (context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching") == false)
							continue;

						jsonDocuments = GetJsonDocuments(nextEtag);
					}
					futureBatchStat.Duration = sp.Elapsed;
					futureBatchStat.Size = jsonDocuments.Results.Length;
					MaybeAddFutureBatch(jsonDocuments);
					return jsonDocuments;
				})
			});
		}
예제 #3
0
	    public void AddFutureBatch(FutureBatchStats futureBatchStat)
		{
			futureBatchStats.Add(futureBatchStat);
			if (futureBatchStats.Count <= 30)
				return;

			foreach (var source in futureBatchStats.OrderBy(x => x.Timestamp).Take(5))
			{
				futureBatchStats.TryRemove(source);
			}
		}
예제 #4
0
        private bool AddFutureBatch(Etag nextEtag, Etag untilEtag, bool isSplitted = false, bool isEarlyExitBatch = false)
        {
            var futureBatchStat = new FutureBatchStats
            {
                Timestamp = SystemTime.UtcNow,
                PrefetchingUser = PrefetchingUser
            };
            Stopwatch sp = Stopwatch.StartNew();
            context.AddFutureBatch(futureBatchStat);

            var cts = new CancellationTokenSource();
            var linkedToken = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, context.CancellationToken);
            var futureIndexBatch = new FutureIndexBatch
            {
                StartingEtag = nextEtag,
                Age = Interlocked.Increment(ref currentIndexingAge),
                CancellationTokenSource = linkedToken,
                IsSplitted = isSplitted,
                Task = Task.Factory.StartNew(() =>
                {
                    List<JsonDocument> jsonDocuments = null;
                    int localWork = 0;
                    var earlyExit = new Reference<bool>();
                    while (context.RunIndexing)
                    {
                        linkedToken.Token.ThrowIfCancellationRequested();
                        jsonDocuments = GetJsonDocsFromDisk(
                            linkedToken.Token,
                            Abstractions.Util.EtagUtil.Increment(nextEtag, -1), untilEtag, earlyExit);

                        if (jsonDocuments.Count > 0)
                            break;

                        futureBatchStat.Retries++;

                        context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching");
                    }

                    futureBatchStat.Duration = sp.Elapsed;
                    futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count;

                    if (jsonDocuments == null)
                        return null;

                    LogEarlyExit(nextEtag, untilEtag, isEarlyExitBatch, jsonDocuments, sp.ElapsedMilliseconds);

                    if (untilEtag != null && earlyExit.Value)
                    {
                        var lastEtag = GetHighestEtag(jsonDocuments);
                        context.TransactionalStorage.Batch(accessor =>
                        {
                            lastEtag = accessor.Documents.GetBestNextDocumentEtag(lastEtag);
                        });

                        if (log.IsDebugEnabled)
                        {
                            log.Debug("Early exit from last future splitted batch, need to fetch documents from etag: {0} to etag: {1}",
                                lastEtag, untilEtag);
                        }

                        linkedToken.Token.ThrowIfCancellationRequested();
                        AddFutureBatch(lastEtag, untilEtag, isEarlyExitBatch: true);
                    }
                    else
                    {
                        linkedToken.Token.ThrowIfCancellationRequested();
                        MaybeAddFutureBatch(jsonDocuments);
                    }
                    return jsonDocuments;
                }, linkedToken.Token)
                .ContinueWith(t =>
                {
                    t.AssertNotFailed();
                    linkedToken = null;
                    return t.Result;
                }, linkedToken.Token)
            };

            futureIndexBatch.Task.ContinueWith(t =>
            {
                FutureBatchCompleted(t.Result.Count);
            }, linkedToken.Token);
            
            return futureIndexBatches.TryAdd(nextEtag, futureIndexBatch);
        }
예제 #5
0
        private bool AddFutureBatch(Etag nextEtag, Etag untilEtag, 
            FutureBatchType batchType, int? docsCount = null)
        {
            var futureBatchStat = new FutureBatchStats
            {
                Timestamp = SystemTime.UtcNow,
                PrefetchingUser = PrefetchingUser
            };
            var sp = Stopwatch.StartNew();
            context.AddFutureBatch(futureBatchStat);

            var docsCountRef = new Reference<int?>() {Value = docsCount};
            var cts = new CancellationTokenSource();
            var linkedToken = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, context.CancellationToken);
            var futureIndexBatch = new FutureIndexBatch
            {
                StartingEtag = nextEtag,
                Age = Interlocked.Increment(ref currentIndexingAge),
                CancellationTokenSource = cts,
                Type = batchType,
                DocsCount = docsCountRef,
                Task = Task.Run(() =>
                {
                    List<JsonDocument> jsonDocuments = null;
                    int localWork = 0;
                    var earlyExit = new Reference<bool>();
                    while (context.RunIndexing)
                    {
                        linkedToken.Token.ThrowIfCancellationRequested();
                        jsonDocuments = GetJsonDocsFromDisk(
                            linkedToken.Token,
                            Abstractions.Util.EtagUtil.Increment(nextEtag, -1), untilEtag, earlyExit);

                        if (jsonDocuments.Count > 0)
                            break;

                        futureBatchStat.Retries++;

                        context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching");
                    }

                    futureBatchStat.Duration = sp.Elapsed;
                    futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count;

                    if (jsonDocuments == null)
                        return null;

                    LogEarlyExit(nextEtag, untilEtag, batchType == FutureBatchType.EarlyExit, 
                        jsonDocuments, sp.ElapsedMilliseconds);

                    if (untilEtag != null && earlyExit.Value)
                    {
                        var lastEtag = GetHighestEtag(jsonDocuments);
                        context.TransactionalStorage.Batch(accessor =>
                        {
                            lastEtag = accessor.Documents.GetBestNextDocumentEtag(lastEtag);
                        });

                        if (log.IsDebugEnabled)
                        {
                            log.Debug("Early exit from last future splitted batch, need to fetch documents from etag: {0} to etag: {1}",
                                lastEtag, untilEtag);
                        }

                        linkedToken.Token.ThrowIfCancellationRequested();
                        docsCountRef.Value = jsonDocuments.Count;
                        var docsLeft = docsCount - jsonDocuments.Count;
                        if (docsLeft > 0 && lastEtag.CompareTo(untilEtag) <= 0)
                            AddFutureBatch(lastEtag, untilEtag, FutureBatchType.EarlyExit, docsLeft);
                    }
                    else
                    {
                        linkedToken.Token.ThrowIfCancellationRequested();
                        MaybeAddFutureBatch(jsonDocuments);
                    }
                    return jsonDocuments;
                }, linkedToken.Token)
                .ContinueWith(t =>
                {
                    using (cts)
                    using (linkedToken)
                    {
                        t.AssertNotFailed();
                    }
                    return t.Result;
                })
            };

            futureIndexBatch.Task.ContinueWith(t =>
            {
                try
                {
                    if (linkedToken.IsCancellationRequested == false)
                        FutureBatchCompleted(t.Result.Count);
                }
                catch (ObjectDisposedException)
                {
                    // this is an expected race with the actual task, this is fine
                }
            });

            var addFutureBatch = futureIndexBatches.TryAdd(nextEtag, futureIndexBatch);
            if (addFutureBatch == false)
            {
                log.Info(string.Format("A future batch starting with {0} etag is already running", nextEtag));
                cts.Cancel();
            }

            return addFutureBatch;
        }
		private void MaybeAddFutureBatch(List<JsonDocument> past)
		{
			if (context.Configuration.DisableDocumentPreFetching || context.RunIndexing == false)
				return;
			if (context.Configuration.MaxNumberOfParallelProcessingTasks == 1)
				return;
			if (past.Count == 0)
				return;
		    if (prefetchingQueue.LoadedSize > autoTuner.MaximumSizeAllowedToFetchFromStorageInBytes)
		        return; // already have too much in memory
            // don't keep _too_ much in memory
		    if (prefetchingQueue.Count > context.Configuration.MaxNumberOfItemsToProcessInSingleBatch * 2)
		        return;

		    var size = 1024;
		    var count = context.LastActualIndexingBatchInfo.Count;
		    if (count > 0)
		    {
		        size = context.LastActualIndexingBatchInfo.Aggregate(0, (o, c) => o + c.TotalDocumentCount)/count;
		    }
			var alreadyLoadedSize = futureIndexBatches.Values.Sum(x =>
			{
				if (x.Task.IsCompleted)
					return x.Task.Result.Sum(doc => doc.SerializedSizeOnDisk);

			    return size;
			});

			if (alreadyLoadedSize > context.Configuration.AvailableMemoryForRaisingBatchSizeLimit)
				return;

			if(MemoryStatistics.IsLowMemory)
				return;
			if (futureIndexBatches.Count > 5) // we limit the number of future calls we do
			{
				int alreadyLoaded = futureIndexBatches.Values.Sum(x =>
				{
					if (x.Task.IsCompleted)
						return x.Task.Result.Count;
					return autoTuner.NumberOfItemsToProcessInSingleBatch / 4 * 3;
				});

				if (alreadyLoaded > autoTuner.NumberOfItemsToProcessInSingleBatch)
					return;
			}

			// ensure we don't do TOO much future caching
			if (MemoryStatistics.AvailableMemory <
				context.Configuration.AvailableMemoryForRaisingBatchSizeLimit)
				return;

			// we loaded the maximum amount, there are probably more items to read now.
			Etag highestLoadedEtag = GetHighestEtag(past);
			Etag nextEtag = GetNextDocumentEtagFromDisk(highestLoadedEtag);

			if (nextEtag == highestLoadedEtag)
				return; // there is nothing newer to do 

			if (futureIndexBatches.ContainsKey(nextEtag)) // already loading this
				return;

			var futureBatchStat = new FutureBatchStats
			{
				Timestamp = SystemTime.UtcNow,
				PrefetchingUser = PrefetchingUser
			};
			Stopwatch sp = Stopwatch.StartNew();
			context.AddFutureBatch(futureBatchStat);
			futureIndexBatches.TryAdd(nextEtag, new FutureIndexBatch
			{
				StartingEtag = nextEtag,
				Age = Interlocked.Increment(ref currentIndexingAge),
				Task = Task.Factory.StartNew(() =>
				{
					List<JsonDocument> jsonDocuments = null;
					int localWork = 0;
					while (context.RunIndexing)
					{
						jsonDocuments = GetJsonDocsFromDisk(Abstractions.Util.EtagUtil.Increment(nextEtag, -1), null);
						if (jsonDocuments.Count > 0)
							break;

						futureBatchStat.Retries++;

						context.WaitForWork(TimeSpan.FromMinutes(10), ref localWork, "PreFetching");
					}
					futureBatchStat.Duration = sp.Elapsed;
					futureBatchStat.Size = jsonDocuments == null ? 0 : jsonDocuments.Count;
					if (jsonDocuments != null)
					{
						MaybeAddFutureBatch(jsonDocuments);
					}
					return jsonDocuments;
				})
			});
		}