Example #1
0
        protected void UpdateIndexingStats(WorkContext context, IndexingWorkStats stats)
        {
            context.TransactionaStorage.Batch(accessor =>
            {
                switch (stats.Operation)
                {
                case IndexingWorkStats.Status.Map:
                    accessor.Indexing.UpdateIndexingStats(name, stats);
                    break;

                case IndexingWorkStats.Status.Reduce:
                    accessor.Indexing.UpdateReduceStats(name, stats);
                    break;

                case IndexingWorkStats.Status.Ignore:
                    break;

                default:
                    throw new ArgumentOutOfRangeException();
                }
            });
        }
Example #2
0
        protected IEnumerable <object> RobustEnumerationReduce(IEnumerable <object> input, IndexingFunc func,
                                                               IStorageActionsAccessor actions, WorkContext context,
                                                               IndexingWorkStats stats)
        {
            // not strictly accurate, but if we get that many errors, probably an error anyway.
            return(new RobustEnumerator(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
            {
                BeforeMoveNext = () => stats.ReduceAttempts++,
                CancelMoveNext = () => stats.ReduceAttempts--,
                OnError = (exception, o) =>
                {
                    context.AddError(name,
                                     TryGetDocKey(o),
                                     exception.Message
                                     );
                    logIndexing.WarnException(
                        String.Format("Failed to execute indexing function on {0} on {1}", name,
                                      TryGetDocKey(o)),
                        exception);

                    stats.ReduceErrors++;
                }
            }.RobustEnumeration(input, func));
        }
Example #3
0
		protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats,out Action<Exception, object> onError)
		{
			onError = (exception, o) =>
			{
				string docId = null;
				var invalidSpatialException = exception as InvalidSpatialShapeException;
				if (invalidSpatialException != null)
					docId = invalidSpatialException.InvalidDocumentId;

				context.AddError(name,
					docId ?? TryGetDocKey(o),
					exception.Message,
					"Map"
					);
				logIndexing.WarnException(
					String.Format("Failed to execute indexing function on {0} on {1}", name,
						TryGetDocKey(o)),
					exception);

				stats.IndexingErrors++;
			};
			return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
			{
				BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts),
				CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts),
				OnError = onError
			}.RobustEnumeration(input, funcs);
		}
Example #4
0
		protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
		{
			switch (stats.Operation)
			{
				case IndexingWorkStats.Status.Map:
					workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateIndexingStats(name, stats));
					break;
				case IndexingWorkStats.Status.Reduce:
					workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateReduceStats(name, stats));
					break;
				case IndexingWorkStats.Status.Ignore:
					break;
				default:
					throw new ArgumentOutOfRangeException();
			}
		}
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var deleted = new Dictionary<ReduceKeyAndBucket, int>();
			var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count);
			batch.SetIndexingPerformance(indexPerfStats);

			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
				return doc;
			})
				.Where(x => x is FilteredDocument == false)
				.ToList();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
			var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();

			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
				var localStats = new IndexingWorkStats();
				var localChanges = new HashSet<ReduceKeyAndBucket>();
				var statsPerKey = new Dictionary<string, int>();
				allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

				using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
				{
					// we are writing to the transactional store from multiple threads here, and in a streaming fashion
					// should result in less memory and better perf
					context.TransactionalStorage.Batch(accessor =>
					{
						var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
						var currentDocumentResults = new List<object>();
						string currentKey = null;
						bool skipDocument = false;
						foreach (var currentDoc in mapResults)
						{
							var documentId = GetDocumentId(currentDoc);
							if (documentId != currentKey)
							{
								count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
								currentDocumentResults.Clear();
								currentKey = documentId;
							}
							else if (skipDocument)
							{
								continue;
							}
							currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

							if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
							{
								skipDocument = true;
								currentDocumentResults.Clear();
								continue;
							}

							Interlocked.Increment(ref localStats.IndexingSuccesses);
						}
						count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
					});
					allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
					allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
					Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
					Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
				}
			});



			UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

			var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
					.Distinct()
					.ToList();

			var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
			var reduceKeyStats = allState.SelectMany(x => x.Item3)
										 .GroupBy(x => x.Key)
										 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
										 .ToList();

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					var reduceKeyStat = enumerator.Current;
					accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
				}
			}));

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
				}
			}));


			UpdateIndexingStats(context, stats);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = documentsWrapped.Count,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			BatchCompleted("Current Map");
			logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
		}
Example #6
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var items             = new List <MapResultItem>();
            var stats             = new IndexingWorkStats();
            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();

            using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
            {
                var mapResults = RobustEnumerationIndex(
                    documentsWrapped.GetEnumerator(),
                    viewGenerator.MapDefinitions,
                    actions,
                    stats)
                                 .ToList();
                actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

                foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
                {
                    var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                    foreach (
                        var doc in
                        RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                    {
                        count++;

                        var reduceValue = viewGenerator.GroupByExtraction(doc);
                        if (reduceValue == null)
                        {
                            logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                              viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                            continue;
                        }
                        var reduceKey = ReduceKeyToString(reduceValue);
                        var docId     = mappedResultFromDocument.Key.ToString();

                        var data = GetMappedData(doc);

                        items.Add(new MapResultItem
                        {
                            Data      = data,
                            DocId     = docId,
                            ReduceKey = reduceKey
                        });

                        changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                    }
                }
            }

            IDictionary <string, HashSet <string> > result;

            while (allReferencedDocs.TryDequeue(out result))
            {
                foreach (var referencedDocument in result)
                {
                    actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                    actions.General.MaybePulseTransaction();
                }
            }

            foreach (var mapResultItem in items)
            {
                actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
                actions.General.MaybePulseTransaction();
            }

            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Example #7
0
		public void UpdateIndexingStats(string index, IndexingWorkStats stats)
		{
			var indexStats = GetCurrentIndex(index);
			indexStats["attempts"] = indexStats.Value<int>("attempts") + stats.IndexingAttempts;
			indexStats["successes"] = indexStats.Value<int>("successes") + stats.IndexingSuccesses;
			indexStats["failures"] = indexStats.Value<int>("failures") + stats.IndexingErrors;
			storage.IndexingStats.UpdateKey(indexStats);
		
		}
Example #8
0
		public void UpdateReduceStats(int id, IndexingWorkStats stats)
		{
			SetCurrentIndexStatsToImpl(id);
			using (var update = new Update(session, IndexesStatsReduce, JET_prep.Replace))
			{
				var oldAttempts = Api.RetrieveColumnAsInt32(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_attempts"]) ?? 0;
				Api.SetColumn(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_attempts"],
					oldAttempts + stats.ReduceAttempts);

				var oldErrors = Api.RetrieveColumnAsInt32(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_errors"]) ?? 0;
				Api.SetColumn(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_errors"],
					oldErrors + stats.ReduceErrors);

				var olsSuccesses = Api.RetrieveColumnAsInt32(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_successes"]) ?? 0;
				Api.SetColumn(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_successes"],
					olsSuccesses + stats.ReduceSuccesses);

				update.Save();
			}
		}
Example #9
0
		protected IEnumerable<object> RobustEnumerationIndex(IEnumerable<object> input, IEnumerable<IndexingFunc> funcs,
															IStorageActionsAccessor actions, WorkContext context, IndexingWorkStats stats)
		{
			return new RobustEnumerator(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
			{
				BeforeMoveNext = () => stats.IndexingAttempts++,
				CancelMoveNext = () => stats.IndexingAttempts--,
				OnError = (exception, o) =>
				{
					context.AddError(name,
									TryGetDocKey(o),
									exception.Message
						);
					logIndexing.WarnException(
						String.Format("Failed to execute indexing function on {0} on {1}", name,
										TryGetDocKey(o)),
						exception);

					stats.IndexingErrors++;
				}
			}.RobustEnumeration(input, funcs);
		}
Example #10
0
        protected IEnumerable <object> RobustEnumerationIndex(IEnumerable <object> input, IEnumerable <IndexingFunc> funcs,
                                                              IStorageActionsAccessor actions, WorkContext context, IndexingWorkStats stats)
        {
            return(new RobustEnumerator(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
            {
                BeforeMoveNext = () => stats.IndexingAttempts++,
                CancelMoveNext = () => stats.IndexingAttempts--,
                OnError = (exception, o) =>
                {
                    context.AddError(name,
                                     TryGetDocKey(o),
                                     exception.Message
                                     );
                    logIndexing.WarnException(
                        String.Format("Failed to execute indexing function on {0} on {1}", name,
                                      TryGetDocKey(o)),
                        exception);

                    stats.IndexingErrors++;
                }
            }.RobustEnumeration(input, funcs));
        }
Example #11
0
        protected void Write(WorkContext context, Func <IndexWriter, Analyzer, IndexingWorkStats, int> action)
        {
            if (disposed)
            {
                throw new ObjectDisposedException("Index " + name + " has been disposed");
            }
            lock (writeLock)
            {
                bool     shouldRecreateSearcher;
                var      toDispose      = new List <Action>();
                Analyzer searchAnalyzer = null;
                try
                {
                    try
                    {
                        searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
                    }
                    catch (Exception e)
                    {
                        context.AddError(name, "Creating Analyzer", e.ToString());
                        throw;
                    }

                    if (indexWriter == null)
                    {
                        indexWriter = CreateIndexWriter(directory);
                    }

                    var stats = new IndexingWorkStats();
                    try
                    {
                        var changedDocs = action(indexWriter, searchAnalyzer, stats);
                        docCountSinceLastOptimization += changedDocs;
                        shouldRecreateSearcher         = changedDocs > 0;
                        foreach (IIndexExtension indexExtension in indexExtensions.Values)
                        {
                            indexExtension.OnDocumentsIndexed(currentlyIndexDocuments);
                        }
                    }
                    catch (Exception e)
                    {
                        context.AddError(name, null, e.ToString());
                        throw;
                    }

                    UpdateIndexingStats(context, stats);

                    WriteTempIndexToDiskIfNeeded(context);

                    if (configuration.TransactionMode == TransactionMode.Safe)
                    {
                        Flush();                         // just make sure changes are flushed to disk
                    }
                }
                finally
                {
                    currentlyIndexDocuments.Clear();
                    if (searchAnalyzer != null)
                    {
                        searchAnalyzer.Close();
                    }
                    foreach (Action dispose in toDispose)
                    {
                        dispose();
                    }
                }
                if (shouldRecreateSearcher)
                {
                    RecreateSearcher();
                }
            }
        }
Example #12
0
		public void UpdateIndexingStats(string index, IndexingWorkStats stats)
		{
			SetCurrentIndexStatsToImpl(index);
			using (var update = new Update(session, IndexesStats, JET_prep.Replace))
			{
				var oldAttempts = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"]) ?? 0;
				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"],
					oldAttempts + stats.IndexingAttempts);

				var oldErrors = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"]) ?? 0;
				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"],
					oldErrors + stats.IndexingErrors);

				var olsSuccesses = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"]) ?? 0;
				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"],
					olsSuccesses + stats.IndexingSuccesses);

				update.Save();
			}
		}
Example #13
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var changed = new HashSet<ReduceKeyAndBucket>();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
				return doc;
			})
				.Where(x => x is FilteredDocument == false)
				.ToList();
			var items = new ConcurrentQueue<MapResultItem>();
			var stats = new IndexingWorkStats();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

			if (documentsWrapped.Count > 0)
				actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
				using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
				{
					var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats);
					var currentDocumentResults = new List<object>();
					string currentKey = null;
					foreach (var currentDoc in mapResults)
					{
						var documentId = GetDocumentId(currentDoc);
						if (documentId != currentKey)
						{
							count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items);
							currentDocumentResults.Clear();
							currentKey = documentId; 
						}
						currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));
					}
					count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items);
				}
			});
			

			IDictionary<string, HashSet<string>> result;
			while (allReferencedDocs.TryDequeue(out result))
			{
				foreach (var referencedDocument in result)
				{
					actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
					actions.General.MaybePulseTransaction();
				}
			}

			foreach (var mapResultItem in items)
			{
				changed.Add(new ReduceKeyAndBucket(mapResultItem.Bucket, mapResultItem.ReduceKey));
				actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
				actions.General.MaybePulseTransaction();
			}

			UpdateIndexingStats(context, stats);
			actions.MapReduce.ScheduleReductions(name, 0, changed);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start
			});
			logIndexing.Debug("Mapped {0} documents for {1}", count, name);
		}
Example #14
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var items = new List <MapResultItem>();
            var stats = new IndexingWorkStats();

            foreach (
                var mappedResultFromDocument in
                GroupByDocumentId(context,
                                  RobustEnumerationIndex(documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats)))
            {
                var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                foreach (
                    var doc in
                    RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    items.Add(new MapResultItem
                    {
                        Data      = data,
                        DocId     = docId,
                        ReduceKey = reduceKey
                    });

                    changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                }
            }

            int mapCount = 0;

            foreach (var mapResultItem in items)
            {
                actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
                if (mapCount++ % 50000 == 0)
                {
                    // The reason this is here is to protect us from Version Store Out Of Memory error during indexing
                    // this can happen if we have indexes that output a VERY large number of items per doc.
                    actions.General.PulseTransaction();
                }
            }

            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Example #15
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable <dynamic> documents,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = documents.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var stats = new IndexingWorkStats();

            foreach (
                var mappedResultFromDocument in
                GroupByDocumentId(context,
                                  RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats)))
            {
                foreach (
                    var doc in
                    RobustEnumerationReduceDuringMapPhase(mappedResultFromDocument, viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    actions.MapReduce.PutMappedResult(name, docId, reduceKey, data);

                    changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                }
            }
            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Example #16
0
		protected void Write(WorkContext context, Func<IndexWriter, Analyzer, IndexingWorkStats, bool> action)
		{
			if (disposed)
				throw new ObjectDisposedException("Index " + name + " has been disposed");
			lock (writeLock)
			{
				bool shouldRecreateSearcher;
				var toDispose = new List<Action>();
				Analyzer searchAnalyzer = null;
				try
				{
					try
					{
						searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
					}
					catch (Exception e)
					{
						context.AddError(name, "Creating Analyzer", e.ToString());
						throw;
					}

					if (indexWriter == null)
					{
						indexWriter = CreateIndexWriter(directory);
					}

					var stats = new IndexingWorkStats();
					try
					{
						shouldRecreateSearcher = action(indexWriter, searchAnalyzer, stats);
						foreach (IIndexExtension indexExtension in indexExtensions.Values)
						{
							indexExtension.OnDocumentsIndexed(currentlyIndexDocuments);
						}
					}
					catch (Exception e)
					{
						context.AddError(name, null, e.ToString());
						throw;
					}

					UpdateIndexingStats(context, stats);

					WriteTempIndexToDiskIfNeeded(context);

					if (configuration.TransactionMode == TransactionMode.Safe)
					{
						Flush(optimize: false);
					}
				}
				finally
				{
					currentlyIndexDocuments.Clear();
					if (searchAnalyzer != null)
						searchAnalyzer.Close();
					foreach (Action dispose in toDispose)
					{
						dispose();
					}
				}
				if (shouldRecreateSearcher)
					RecreateSearcher();
			}
		}
Example #17
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = SystemTime.UtcNow;
            var deleted     = new Dictionary <ReduceKeyAndBucket, int>();

            RecordCurrentBatch("Current Map", batch.Docs.Count);
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false)
                                   .ToList();
            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();
                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        foreach (var currentDoc in mapResults)
                        {
                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

                            EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count);

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
                    });
                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                }
            });



            UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                }
            }));


            UpdateIndexingStats(context, stats);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount  = sourceCount,
                InputCount  = documentsWrapped.Count,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            BatchCompleted("Current Map");
            logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
        }
Example #18
0
		public void UpdateIndexingStats(int id, IndexingWorkStats stats)
		{
			SetCurrentIndexStatsToImpl(id);
			using (var update = new Update(session, IndexesStats, JET_prep.Replace))
			{
				var oldAttempts = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"]) ?? 0;
				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"],
					oldAttempts + stats.IndexingAttempts);

				var oldErrors = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"]) ?? 0;
				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"],
					oldErrors + stats.IndexingErrors);

				var olsSuccesses = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"]) ?? 0;
				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"],
					olsSuccesses + stats.IndexingSuccesses);

				Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["last_indexing_time"],
					SystemTime.UtcNow.ToBinary());

				update.Save();
			}
		}
Example #19
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable <dynamic> documents,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count = 0;

            // we mark the reduce keys to delete when we delete the mapped results, then we remove
            // any reduce key that is actually being used to generate new mapped results
            // this way, only reduces that removed data will force us to use the tasks approach
            var reduceKeysToDelete = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);
            var documentsWrapped   = documents.Select(doc =>
            {
                var documentId = doc.__document_id;
                foreach (var reduceKey in actions.MappedResults.DeleteMappedResultsForDocumentId((string)documentId, name))
                {
                    reduceKeysToDelete.Add(reduceKey);
                }
                return(doc);
            });
            var stats = new IndexingWorkStats();

            foreach (var mappedResultFromDocument in GroupByDocumentId(RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats)))
            {
                foreach (var doc in RobustEnumerationReduceDuringMapPhase(mappedResultFromDocument, viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    reduceKeysToDelete.Remove((string)reduceKey);

                    var data = GetMapedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    var hash = ComputeHash(name, reduceKey);

                    actions.MappedResults.PutMappedResult(name, docId, reduceKey, data, hash);
                }
            }
            UpdateIndexingStats(context, stats);
            if (reduceKeysToDelete.Count > 0)
            {
                actions.Tasks.AddTask(new ReduceTask
                {
                    Index      = name,
                    ReduceKeys = reduceKeysToDelete.ToArray()
                }, minimumTimestamp);
            }

            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Example #20
0
		public void UpdateReduceStats(string index, IndexingWorkStats stats)
		{
			var indexStats = GetCurrentIndex(index);
			indexStats["reduce_attempts"] = indexStats.Value<int>("reduce_attempts") + stats.ReduceAttempts;
			indexStats["reduce_successes"] = indexStats.Value<int>("reduce_successes") + stats.ReduceSuccesses;
			indexStats["reduce_failures"] = indexStats.Value<int>("reduce_failures") + stats.ReduceSuccesses;
			storage.IndexingStats.UpdateKey(indexStats);
		
		}
		public void UpdateIndexingStats(string index, IndexingWorkStats stats)
		{
			using (storage.WriteLock())
			{
				var indexStats = (RavenJObject) GetCurrentIndex(index).CloneToken();
				indexStats["attempts"] = indexStats.Value<int>("attempts") + stats.IndexingAttempts;
				indexStats["successes"] = indexStats.Value<int>("successes") + stats.IndexingSuccesses;
				indexStats["failures"] = indexStats.Value<int>("failures") + stats.IndexingErrors;
				indexStats["lastIndexingTime"] = SystemTime.UtcNow;
				storage.IndexingStats.UpdateKey(indexStats);
			}
		}
Example #22
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count            = 0;
            var sourceCount      = 0;
            var deleted          = new Dictionary <ReduceKeyAndBucket, int>();
            var performance      = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
            var performanceStats = new List <BasePerformanceStats>();

            var usedStorageAccessors = new ConcurrentSet <IStorageActionsAccessor>();

            if (usedStorageAccessors.TryAdd(actions))
            {
                var storageCommitDuration = new Stopwatch();

                actions.BeforeStorageCommit += storageCommitDuration.Start;

                actions.AfterStorageCommit += () =>
                {
                    storageCommitDuration.Stop();

                    performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                };
            }

            var deleteMappedResultsDuration = new Stopwatch();
            var documentsWrapped            = batch.Docs.Select(doc =>
            {
                token.ThrowIfCancellationRequested();

                sourceCount++;
                var documentId = doc.__document_id;

                using (StopwatchScope.For(deleteMappedResultsDuration))
                {
                    actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                }

                return(doc);
            })
                                              .Where(x => x is FilteredDocument == false)
                                              .ToList();

            performanceStats.Add(new PerformanceStats
            {
                Name       = IndexingOperation.Map_DeleteMappedResults,
                DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
            });

            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

            var parallelProcessingStart = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                token.ThrowIfCancellationRequested();
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                };

                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();

                var linqExecutionDuration            = new Stopwatch();
                var reduceInMapLinqExecutionDuration = new Stopwatch();
                var putMappedResultsDuration         = new Stopwatch();
                var convertToRavenJObjectDuration    = new Stopwatch();

                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        if (usedStorageAccessors.TryAdd(accessor))
                        {
                            var storageCommitDuration = new Stopwatch();

                            accessor.BeforeStorageCommit += storageCommitDuration.Start;

                            accessor.AfterStorageCommit += () =>
                            {
                                storageCommitDuration.Stop();

                                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                            };
                        }

                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        bool skipDocument          = false;

                        foreach (var currentDoc in mapResults)
                        {
                            token.ThrowIfCancellationRequested();

                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            else if (skipDocument)
                            {
                                continue;
                            }

                            RavenJObject currentDocJObject;
                            using (StopwatchScope.For(convertToRavenJObjectDuration))
                            {
                                currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer);
                            }

                            currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject));

                            if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
                            {
                                skipDocument = true;
                                currentDocumentResults.Clear();
                                continue;
                            }

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });

                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                }
            });

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                BatchedOperations = parallelOperations.ToList()
            });

            var updateDocumentReferencesDuration = new Stopwatch();

            using (StopwatchScope.For(updateDocumentReferencesDuration))
            {
                UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
            }
            performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            actions.General.MaybePulseTransaction();

            var parallelReductionOperations = new ConcurrentQueue <ParallelBatchStats>();
            var parallelReductionStart      = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds
                };

                var scheduleReductionsDuration = new Stopwatch();

                using (StopwatchScope.For(scheduleReductionsDuration))
                {
                    while (enumerator.MoveNext())
                    {
                        accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                        accessor.General.MaybePulseTransaction();
                    }
                }

                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                parallelReductionOperations.Enqueue(parallelStats);
            }));

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelReductionOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds,
                BatchedOperations = parallelReductionOperations.ToList()
            });

            UpdateIndexingStats(context, stats);

            performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats);

            logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);

            return(performance);
        }
Example #23
0
		protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats)
		{
			return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
			{
				BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts),
				CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts),
				OnError = (exception, o) =>
				{
					context.AddError(name,
									TryGetDocKey(o),
									exception.Message,
									"Map"
						);
					logIndexing.WarnException(
						String.Format("Failed to execute indexing function on {0} on {1}", name,
										TryGetDocKey(o)),
						exception);

					stats.IndexingErrors++;
				}
			}.RobustEnumeration(input, funcs);
		}
Example #24
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var deleted = new Dictionary<ReduceKeyAndBucket, int>();
			var performance = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
			var performanceStats = new List<BasePerformanceStats>();

			var usedStorageAccessors = new ConcurrentSet<IStorageActionsAccessor>();

			if (usedStorageAccessors.TryAdd(actions))
			{
				var storageCommitDuration = new Stopwatch();

				actions.BeforeStorageCommit += storageCommitDuration.Start;

				actions.AfterStorageCommit += () =>
				{
					storageCommitDuration.Stop();

					performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
				};
			}

			var deleteMappedResultsDuration = new Stopwatch();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				token.ThrowIfCancellationRequested();

				sourceCount++;
				var documentId = doc.__document_id;

				using (StopwatchScope.For(deleteMappedResultsDuration))
				{
					actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
				}
				
				return doc;
			})
			.Where(x => x is FilteredDocument == false)
			.ToList();

			performanceStats.Add(new PerformanceStats
			{
				Name = IndexingOperation.Map_DeleteMappedResults,
				DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
			});

			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
			var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();

			var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

			var parallelProcessingStart = SystemTime.UtcNow;

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
                token.ThrowIfCancellationRequested();
				var parallelStats = new ParallelBatchStats
				{
					StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
				};

				var localStats = new IndexingWorkStats();
				var localChanges = new HashSet<ReduceKeyAndBucket>();
				var statsPerKey = new Dictionary<string, int>();

				var linqExecutionDuration = new Stopwatch();
				var reduceInMapLinqExecutionDuration = new Stopwatch();
				var putMappedResultsDuration = new Stopwatch();
				var convertToRavenJObjectDuration = new Stopwatch();

				allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

				using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
				{
					// we are writing to the transactional store from multiple threads here, and in a streaming fashion
					// should result in less memory and better perf
					context.TransactionalStorage.Batch(accessor =>
					{
						if (usedStorageAccessors.TryAdd(accessor))
						{
							var storageCommitDuration = new Stopwatch();

							accessor.BeforeStorageCommit += storageCommitDuration.Start;

							accessor.AfterStorageCommit += () =>
							{
								storageCommitDuration.Stop();

								parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
							};
						}

						var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
						var currentDocumentResults = new List<object>();
						string currentKey = null;
						bool skipDocument = false;
						
						foreach (var currentDoc in mapResults)
						{
							token.ThrowIfCancellationRequested();

							var documentId = GetDocumentId(currentDoc);
							if (documentId != currentKey)
							{
								count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

								currentDocumentResults.Clear();
								currentKey = documentId;
							}
							else if (skipDocument)
							{
								continue;
							}

							RavenJObject currentDocJObject;
							using (StopwatchScope.For(convertToRavenJObjectDuration))
							{
								currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer);
							}

							currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject));

							if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
							{
								skipDocument = true;
								currentDocumentResults.Clear();
								continue;
							}

							Interlocked.Increment(ref localStats.IndexingSuccesses);
						}
						count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds));

						parallelOperations.Enqueue(parallelStats);
					});

					allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
					allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
				}
			});

			performanceStats.Add(new ParallelPerformanceStats
			{
				NumberOfThreads = parallelOperations.Count,
				DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
				BatchedOperations = parallelOperations.ToList()
			});

			var updateDocumentReferencesDuration = new Stopwatch();
			using (StopwatchScope.For(updateDocumentReferencesDuration))
			{
			UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
			}
			performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));

			var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
					.Distinct()
					.ToList();

			var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
			var reduceKeyStats = allState.SelectMany(x => x.Item3)
										 .GroupBy(x => x.Key)
										 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
										 .ToList();

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					var reduceKeyStat = enumerator.Current;
					accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
				}
			}));


			var parallelReductionOperations = new ConcurrentQueue<ParallelBatchStats>();
			var parallelReductionStart = SystemTime.UtcNow;

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				var parallelStats = new ParallelBatchStats
				{
					StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds
				};

				var scheduleReductionsDuration = new Stopwatch();

				using (StopwatchScope.For(scheduleReductionsDuration))
				{
					while (enumerator.MoveNext())
					{
						accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
					}
				}

				parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
				parallelReductionOperations.Enqueue(parallelStats);
			}));

			performanceStats.Add(new ParallelPerformanceStats
			{
				NumberOfThreads = parallelReductionOperations.Count,
				DurationMs = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds,
				BatchedOperations = parallelReductionOperations.ToList()
			});

			UpdateIndexingStats(context, stats);

			performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats);

			logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);

			return performance;
		}
		public void UpdateIndexingStats(int id, IndexingWorkStats stats)
		{
			var key = CreateKey(id);

			ushort version;
			var index = Load(tableStorage.IndexingStats, key, out version);

			index["attempts"] = index.Value<int>("attempts") + stats.IndexingAttempts;
			index["successes"] = index.Value<int>("successes") + stats.IndexingSuccesses;
			index["failures"] = index.Value<int>("failures") + stats.IndexingErrors;
			index["lastIndexingTime"] = SystemTime.UtcNow;

			tableStorage.IndexingStats.Add(writeBatch.Value, key, index, version);
		}
Example #26
0
		protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action)
		{
			if (disposed)
				throw new ObjectDisposedException("Index " + name + " has been disposed");

			PreviousIndexTime = LastIndexTime;
			LastIndexTime = SystemTime.UtcNow;

			lock (writeLock)
			{
				bool shouldRecreateSearcher;
				var toDispose = new List<Action>();
				Analyzer searchAnalyzer = null;
				var itemsInfo = new IndexedItemsInfo(null);
				bool flushed = false;

			    try
			    {
			        waitReason = "Write";
			        try
			        {
			            searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
			        }
			        catch (Exception e)
			        {
			            context.AddError(name, "Creating Analyzer", e.ToString(), "Analyzer");
			            throw;
			        }

			        if (indexWriter == null)
			        {
			            CreateIndexWriter();
			        }

			        var locker = directory.MakeLock("writing-to-index.lock");
			        try
			        {
			            var stats = new IndexingWorkStats();

			            try
			            {
			                if (locker.Obtain() == false)
			                {
			                    throw new InvalidOperationException(
			                        string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
			                            name));
			                }

			                itemsInfo = action(indexWriter, searchAnalyzer, stats);
			                shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
                            
			                foreach (var indexExtension in indexExtensions.Values)
			                {
			                    indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
			                }
			            }
			            catch (Exception e)
			            {
			                context.AddError(name, null, e.ToString(), "Write");
			                throw;
			            }

			            if (itemsInfo.ChangedDocs > 0)
			            {
			                WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);

							if(indexWriter != null && indexWriter.RamSizeInBytes() >= flushSize)
							{
								Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk
								flushed = true;
							}
				            
							UpdateIndexingStats(context, stats);
			            }
			        }
			        finally
			        {
			            locker.Release();
			        }
			    }
			    catch (Exception e)
			    {
			        throw new InvalidOperationException("Could not properly write to index " + name, e);
			    }
				finally
				{
					currentlyIndexDocuments.Clear();
					if (searchAnalyzer != null)
						searchAnalyzer.Close();
					foreach (Action dispose in toDispose)
					{
						dispose();
					}
					waitReason = null;
					LastIndexTime = SystemTime.UtcNow;
				}

				if (flushed)
				{
					try
					{
						HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo());
					}
					catch (Exception e)
					{
						logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
					}
				}

				if (shouldRecreateSearcher)
					RecreateSearcher();
			}
		}
		public void UpdateReduceStats(int id, IndexingWorkStats stats)
		{
			var key = CreateKey(id);

			ushort version;
			var reduceStats = Load(tableStorage.ReduceStats, key, out version);

			reduceStats["reduce_attempts"] = reduceStats.Value<int>("reduce_attempts") + stats.ReduceAttempts;
			reduceStats["reduce_successes"] = reduceStats.Value<int>("reduce_successes") + stats.ReduceSuccesses;
			reduceStats["reduce_failures"] = reduceStats.Value<int>("reduce_failures") + stats.ReduceErrors;

			tableStorage.ReduceStats.Add(writeBatch.Value, key, reduceStats, version);
		}
Example #28
0
		protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats)
		{
			Action<Exception, object> onError;
			return RobustEnumerationIndex(input, funcs, stats, out onError);
		}
Example #29
0
		protected void Write(Func<IndexWriter, Analyzer, IndexingWorkStats, int> action)
		{
			if (disposed)
				throw new ObjectDisposedException("Index " + name + " has been disposed");
			LastIndexTime = SystemTime.UtcNow;
			lock (writeLock)
			{
				bool shouldRecreateSearcher;
				var toDispose = new List<Action>();
				Analyzer searchAnalyzer = null;
				try
				{
					waitReason = "Write";
					try
					{
						searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
					}
					catch (Exception e)
					{
						context.AddError(name, "Creating Analyzer", e.ToString());
						throw;
					}

					if (indexWriter == null)
					{
						CreateIndexWriter();
					}

					var locker = directory.MakeLock("writing-to-index.lock");
					try
					{
						int changedDocs;
						var stats = new IndexingWorkStats();
						try
						{
							changedDocs = action(indexWriter, searchAnalyzer, stats);
							shouldRecreateSearcher = changedDocs > 0;
							foreach (var indexExtension in indexExtensions.Values)
							{
								indexExtension.OnDocumentsIndexed(currentlyIndexDocuments);
							}
						}
						catch (Exception e)
						{
							context.AddError(name, null, e.ToString());
							throw;
						}

						if (changedDocs > 0)
						{
							UpdateIndexingStats(context, stats);
							WriteTempIndexToDiskIfNeeded(context);

							Flush(); // just make sure changes are flushed to disk
						}
					}
					finally
					{
						locker.Release();
					}
				}
				finally
				{
					currentlyIndexDocuments.Clear();
					if (searchAnalyzer != null)
						searchAnalyzer.Close();
					foreach (Action dispose in toDispose)
					{
						dispose();
					}
					waitReason = null;
					LastIndexTime = SystemTime.UtcNow;
				}
				if (shouldRecreateSearcher)
					RecreateSearcher();
			}
		}
Example #30
0
		protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func,
															IStorageActionsAccessor actions,
			IndexingWorkStats stats)
		{
			// not strictly accurate, but if we get that many errors, probably an error anyway.
			return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
			{
				BeforeMoveNext = () => Interlocked.Increment(ref stats.ReduceAttempts),
				CancelMoveNext = () => Interlocked.Decrement(ref stats.ReduceAttempts),
				OnError = (exception, o) =>
				{
					context.AddError(name,
									TryGetDocKey(o),
									exception.Message,
									"Reduce"
						);
					logIndexing.WarnException(
						String.Format("Failed to execute indexing function on {0} on {1}", name,
										TryGetDocKey(o)),
						exception);

					stats.ReduceErrors++;
				}
			}.RobustEnumeration(input, func);
		}
Example #31
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var changed = new HashSet<ReduceKeyAndBucket>();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
				return doc;
			})
				.Where(x => x is FilteredDocument == false);
			var items = new List<MapResultItem>();
			var stats = new IndexingWorkStats();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
			{
				var mapResults = RobustEnumerationIndex(
						documentsWrapped.GetEnumerator(), 
						viewGenerator.MapDefinitions, 
						actions, 
						stats)
					.ToList();
				actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

				foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
				{
					var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
					foreach (
						var doc in
							RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
					{
						count++;

						var reduceValue = viewGenerator.GroupByExtraction(doc);
						if (reduceValue == null)
						{
							logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
											  viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
							continue;
						}
						var reduceKey = ReduceKeyToString(reduceValue);
						var docId = mappedResultFromDocument.Key.ToString();

						var data = GetMappedData(doc);

						items.Add(new MapResultItem
						{
							Data = data,
							DocId = docId,
							ReduceKey = reduceKey
						});

						changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
					}
				}
			}

			IDictionary<string, HashSet<string>> result;
			while (allReferencedDocs.TryDequeue(out result))
			{
				foreach (var referencedDocument in result)
				{
					actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
					actions.General.MaybePulseTransaction();
				}
			}

			foreach (var mapResultItem in items)
			{
				actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
				actions.General.MaybePulseTransaction();
			}

			UpdateIndexingStats(context, stats);
			actions.MapReduce.ScheduleReductions(name, 0, changed);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start
			});
			logIndexing.Debug("Mapped {0} documents for {1}", count, name);
		}