Example #1
0
        public void Index(string index,
                          AbstractViewGenerator viewGenerator,
                          IndexingBatch batch,
                          WorkContext context,
                          IStorageActionsAccessor actions,
                          DateTime minimumTimestamp)
        {
            Index value;

            if (indexes.TryGetValue(index, out value) == false)
            {
                log.Debug("Tried to index on a non existent index {0}, ignoring", index);
                return;
            }
            using (EnsureInvariantCulture())
                using (DocumentCacher.SkipSettingDocumentsInDocumentCache())
                {
                    value.IndexDocuments(viewGenerator, batch, context, actions, minimumTimestamp);
                    context.RaiseIndexChangeNotification(new IndexChangeNotification
                    {
                        Name = index,
                        Type = IndexChangeTypes.MapCompleted
                    });
                }
        }
Example #2
0
        private void IndexDocuments(IStorageActionsAccessor actions, string index, IndexingBatch batch)
        {
            var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(index);

            if (viewGenerator == null)
            {
                return;                 // index was deleted, probably
            }
            try
            {
                if (log.IsDebugEnabled)
                {
                    string ids;
                    if (batch.Ids.Count < 256)
                    {
                        ids = string.Join(",", batch.Ids);
                    }
                    else
                    {
                        ids = string.Join(", ", batch.Ids.Take(128)) + " ... " + string.Join(", ", batch.Ids.Skip(batch.Ids.Count - 128));
                    }
                    log.Debug("Indexing {0} documents for index: {1}. ({2})", batch.Docs.Count, index, ids);
                }
                context.CancellationToken.ThrowIfCancellationRequested();

                context.IndexStorage.Index(index, viewGenerator, batch.Docs, context, actions, batch.DateTime ?? DateTime.MinValue);
            }
            catch (OperationCanceledException)
            {
                throw;
            }
            catch (Exception e)
            {
                if (actions.IsWriteConflict(e))
                {
                    return;
                }
                log.WarnException(
                    string.Format("Failed to index documents for index: {0}", index),
                    e);
            }
        }
Example #3
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var items             = new List <MapResultItem>();
            var stats             = new IndexingWorkStats();
            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();

            using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
            {
                var mapResults = RobustEnumerationIndex(
                    documentsWrapped.GetEnumerator(),
                    viewGenerator.MapDefinitions,
                    actions,
                    stats)
                                 .ToList();
                actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

                foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
                {
                    var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                    foreach (
                        var doc in
                        RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                    {
                        count++;

                        var reduceValue = viewGenerator.GroupByExtraction(doc);
                        if (reduceValue == null)
                        {
                            logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                              viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                            continue;
                        }
                        var reduceKey = ReduceKeyToString(reduceValue);
                        var docId     = mappedResultFromDocument.Key.ToString();

                        var data = GetMappedData(doc);

                        items.Add(new MapResultItem
                        {
                            Data      = data,
                            DocId     = docId,
                            ReduceKey = reduceKey
                        });

                        changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                    }
                }
            }

            IDictionary <string, HashSet <string> > result;

            while (allReferencedDocs.TryDequeue(out result))
            {
                foreach (var referencedDocument in result)
                {
                    actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                    actions.General.MaybePulseTransaction();
                }
            }

            foreach (var mapResultItem in items)
            {
                actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
                actions.General.MaybePulseTransaction();
            }

            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Example #4
0
		private IEnumerable<Tuple<IndexToWorkOn, IndexingBatch>> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs)
		{
			var last = jsonDocs.Last();

			Debug.Assert(last.Etag != null);
			Debug.Assert(last.LastModified != null);

			var lastEtag = last.Etag.Value;
			var lastModified = last.LastModified.Value;

			var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());

			var documentRetriever = new DocumentRetriever(null, context.ReadTriggers);

			var filteredDocs =
				BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc =>
				{
					doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
					return doc == null ? null : new {Doc = doc, Json = JsonToExpando.Convert(doc.ToJson())};
				});

			log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);

			var results = new Tuple<IndexToWorkOn, IndexingBatch>[indexesToWorkOn.Count];
			var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count];

			BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) =>
			{
				var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray());
				if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0)
					return;

				var indexName = indexToWorkOn.IndexName;
				var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
				if (viewGenerator == null)
					return; // probably deleted

				var batch = new IndexingBatch();

				foreach (var item in filteredDocs)
				{
					// did we already indexed this document in this index?
					if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0)
						continue;


					// is the Raven-Entity-Name a match for the things the index executes on?
					if (viewGenerator.ForEntityNames.Count != 0 &&
					    viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false)
					{
						continue;
					}

					batch.Add(item.Doc, item.Json);

					if (batch.DateTime == null)
						batch.DateTime = item.Doc.LastModified;
					else
						batch.DateTime = batch.DateTime > item.Doc.LastModified
						                 	? item.Doc.LastModified
						                 	: batch.DateTime;
				}

				if (batch.Docs.Count == 0)
				{
					log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
						lastEtag, lastModified);
					// we use it this way to batch all the updates together
					actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified);
					return;
				}
				log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn);
				results[i] = Tuple.Create(indexToWorkOn, batch);

			});

			transactionalStorage.Batch(actionsAccessor =>
			{
				foreach (var action in actions)
				{
					if (action != null)
						action(actionsAccessor);
				}
			});

			return results.Where(x => x != null);
		}
Example #5
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var items = new List <MapResultItem>();
            var stats = new IndexingWorkStats();

            foreach (
                var mappedResultFromDocument in
                GroupByDocumentId(context,
                                  RobustEnumerationIndex(documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats)))
            {
                var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                foreach (
                    var doc in
                    RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    items.Add(new MapResultItem
                    {
                        Data      = data,
                        DocId     = docId,
                        ReduceKey = reduceKey
                    });

                    changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                }
            }

            int mapCount = 0;

            foreach (var mapResultItem in items)
            {
                actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
                if (mapCount++ % 50000 == 0)
                {
                    // The reason this is here is to protect us from Version Store Out Of Memory error during indexing
                    // this can happen if we have indexes that output a VERY large number of items per doc.
                    actions.General.PulseTransaction();
                }
            }

            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Example #6
0
		public IndexingPerformanceStats Index(int index, AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			Index value;
			if (indexes.TryGetValue(index, out value) == false)
			{
				log.Debug("Tried to index on a non existent index {0}, ignoring", index);
				return null;
			}
			using (EnsureInvariantCulture())
			using (DocumentCacher.SkipSettingDocumentsInDocumentCache())
			{
				var performance = value.IndexDocuments(viewGenerator, batch, actions, minimumTimestamp, token);
				context.RaiseIndexChangeNotification(new IndexChangeNotification
				{
					Name = value.PublicName,
					Type = IndexChangeTypes.MapCompleted
				});

				return performance;
			}
		}
Example #7
0
		private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch)
		{
			var last = jsonDocs.Last();

			Debug.Assert(last.Etag != null);
			Debug.Assert(last.LastModified != null);

			var lastEtag = last.Etag;
			var lastModified = last.LastModified.Value;

			var documentRetriever = new DocumentRetriever(null, context.ReadTriggers, context.Database.InFlightTransactionalState);

			var filteredDocs =
				BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc =>
				{
					var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
					return filteredDoc == null ? new
					{
						Doc = doc,
						Json = (object)new FilteredDocument(doc)
					} : new
					{
						Doc = filteredDoc,
						Json = JsonToExpando.Convert(doc.ToJson())
					};
				});

			Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);

			var results = new IndexingBatchForIndex[indexesToWorkOn.Count];
			var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count];

			BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) =>
			{
				var indexName = indexToWorkOn.IndexName;
				var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
				if (viewGenerator == null)
					return; // probably deleted

				var batch = new IndexingBatch(highestETagInBatch);

				foreach (var item in filteredDocs)
				{
					if (prefetchingBehavior.FilterDocuments(item.Doc) == false)
						continue;

					// did we already indexed this document in this index?
					var etag = item.Doc.Etag;
					if (etag == null)
						continue;

					// is the Raven-Entity-Name a match for the things the index executes on?
					if (viewGenerator.ForEntityNames.Count != 0 &&
						viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false)
					{
						continue;
					}

					batch.Add(item.Doc, item.Json, prefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc));

					if (batch.DateTime == null)
						batch.DateTime = item.Doc.LastModified;
					else
						batch.DateTime = batch.DateTime > item.Doc.LastModified
											 ? item.Doc.LastModified
											 : batch.DateTime;
				}

				if (batch.Docs.Count == 0)
				{
					Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
							  lastEtag, lastModified);
					// we use it this way to batch all the updates together
					actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified);
					return;
				}
				if (Log.IsDebugEnabled)
				{
					Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids));
				}
				results[i] = new IndexingBatchForIndex
				{
					Batch = batch,
					IndexName = indexToWorkOn.IndexName,
					Index = indexToWorkOn.Index,
					LastIndexedEtag = indexToWorkOn.LastIndexedEtag
				};

			});

			transactionalStorage.Batch(actionsAccessor =>
			{
				foreach (var action in actions)
				{
					if (action != null)
						action(actionsAccessor);
				}
			});

			return results.Where(x => x != null);
		}
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var deleted = new Dictionary<ReduceKeyAndBucket, int>();
			var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count);
			batch.SetIndexingPerformance(indexPerfStats);

			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
				return doc;
			})
				.Where(x => x is FilteredDocument == false)
				.ToList();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
			var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();

			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
				var localStats = new IndexingWorkStats();
				var localChanges = new HashSet<ReduceKeyAndBucket>();
				var statsPerKey = new Dictionary<string, int>();
				allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

				using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
				{
					// we are writing to the transactional store from multiple threads here, and in a streaming fashion
					// should result in less memory and better perf
					context.TransactionalStorage.Batch(accessor =>
					{
						var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
						var currentDocumentResults = new List<object>();
						string currentKey = null;
						bool skipDocument = false;
						foreach (var currentDoc in mapResults)
						{
							var documentId = GetDocumentId(currentDoc);
							if (documentId != currentKey)
							{
								count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
								currentDocumentResults.Clear();
								currentKey = documentId;
							}
							else if (skipDocument)
							{
								continue;
							}
							currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

							if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
							{
								skipDocument = true;
								currentDocumentResults.Clear();
								continue;
							}

							Interlocked.Increment(ref localStats.IndexingSuccesses);
						}
						count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
					});
					allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
					allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
					Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
					Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
				}
			});



			UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

			var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
					.Distinct()
					.ToList();

			var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
			var reduceKeyStats = allState.SelectMany(x => x.Item3)
										 .GroupBy(x => x.Key)
										 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
										 .ToList();

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					var reduceKeyStat = enumerator.Current;
					accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
				}
			}));

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
				}
			}));


			UpdateIndexingStats(context, stats);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = documentsWrapped.Count,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			BatchCompleted("Current Map");
			logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
		}
Example #9
0
        private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch, out List<IndexToWorkOn> filteredOutIndexes)
        {
            var innerFilteredOutIndexes = new ConcurrentStack<IndexToWorkOn>();
            var last = jsonDocs.Last();

            Debug.Assert(last.Etag != null);
            Debug.Assert(last.LastModified != null);

            var lastEtag = last.Etag;
            var lastModified = last.LastModified.Value;


            var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers);

            var filteredDocs =
                BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc =>
                {
                    var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, ReadOperation.Index);
                    return filteredDoc == null ? new
                    {
                        Doc = doc,
                        Json = (object)new FilteredDocument(doc)
                    } : new
                    {
                        Doc = filteredDoc,
                        Json = JsonToExpando.Convert(doc.ToJson())
                    };
                });

            if ( Log.IsDebugEnabled ) 
                Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);


            var results = new ConcurrentQueue<IndexingBatchForIndex>();
            var actions = new ConcurrentQueue<Action<IStorageActionsAccessor>>();
            context.Database.MappingThreadPool.ExecuteBatch(indexesToWorkOn, indexToWorkOn =>
            {
                var indexName = indexToWorkOn.Index.PublicName;
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
                if (viewGenerator == null)
                    return; // probably deleted

                var batch = new IndexingBatch(highestETagInBatch);


                foreach (var filteredDoc in filteredDocs)
                {
                    var doc = filteredDoc.Doc;
                    var json = filteredDoc.Json;


                    if (defaultPrefetchingBehavior.FilterDocuments(doc) == false
                        || doc.Etag.CompareTo(indexToWorkOn.LastIndexedEtag) <= 0)
                        continue;

                    // did we already indexed this document in this index?

                    var etag = doc.Etag;
                    if (etag == null)
                        continue;

                    // is the Raven-Entity-Name a match for the things the index executes on?
                    if (viewGenerator.ForEntityNames.Count != 0 &&
                        viewGenerator.ForEntityNames.Contains(doc.Metadata.Value<string>(Constants.RavenEntityName)) == false)
                    {
                        continue;
                    }

                    batch.Add(doc, json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(doc));

                    if (batch.DateTime == null)

                        batch.DateTime = doc.LastModified;
                    else
                        batch.DateTime = batch.DateTime > doc.LastModified
                            ? doc.LastModified
                                             : batch.DateTime;
                }

                if (batch.Docs.Count == 0)
                {
                    if ( Log.IsDebugEnabled )
                        Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified);

                    // we use it this way to batch all the updates together
                    if (indexToWorkOn.LastIndexedEtag.CompareTo(lastEtag) < 0)
                        actions.Enqueue(accessor =>
                    {
                        accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified);
                        accessor.AfterStorageCommit += () =>
                        {
                            indexToWorkOn.Index.EnsureIndexWriter();
                            indexToWorkOn.Index.Flush(lastEtag);
                        };
                        });
                    innerFilteredOutIndexes.Push(indexToWorkOn);
                    context.MarkIndexFilteredOut(indexName);
                    return;
                }
                
                if (Log.IsDebugEnabled)
                    Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids));
                
                results.Enqueue(new IndexingBatchForIndex
                {
                    Batch = batch,
                    IndexId = indexToWorkOn.IndexId,
                    Index = indexToWorkOn.Index,
                    LastIndexedEtag = indexToWorkOn.LastIndexedEtag
            });
            }, description: string.Format("Filtering documents for {0} indexes", indexesToWorkOn.Count));

            filteredOutIndexes = innerFilteredOutIndexes.ToList();
                foreach (var action in actions)
                {
                bool keepTrying = true;
                for (int i = 0; i < 10 && keepTrying; i++)
                {
                    keepTrying = false;
                    transactionalStorage.Batch(actionsAccessor =>
                    {
                    if (action != null)
                    {
                        try
                        {
                            action(actionsAccessor);
                        }
                        catch (Exception e)
                        {
                                if (actionsAccessor.IsWriteConflict(e))
                                {
                                    keepTrying = true;
                                    return;
                        }
                                throw;
                    }
                }
            });

                    if (keepTrying)
                        Thread.Sleep(11);
                }
            }
            return results.Where(x => x != null);
        }
Example #10
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var writeToIndexStats = new List<PerformanceStats>();

			IndexingPerformanceStats performance = null;
			var performanceStats = new List<BasePerformanceStats>();

			var storageCommitDuration = new Stopwatch();

			actions.BeforeStorageCommit += storageCommitDuration.Start;

			actions.AfterStorageCommit += () =>
			{
				storageCommitDuration.Stop();

				performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
			};

			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();

				try
				{
					performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

					var deleteExistingDocumentsDuration = new Stopwatch();
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						token.ThrowIfCancellationRequested();

						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
						    context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
						{
							using (StopwatchScope.For(deleteExistingDocumentsDuration))
							{
								indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
							}
						}

						return doc;
					})
					.Where(x => x is FilteredDocument == false)
					.ToList();

					performanceStats.Add(new PerformanceStats
					{
						Name = IndexingOperation.Lucene_DeleteExistingDocument,
						DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
					});

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

					var parallelProcessingStart = SystemTime.UtcNow;

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
                        token.ThrowIfCancellationRequested();
						var parallelStats = new ParallelBatchStats
						{
							StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
						};

						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;

							var linqExecutionDuration = new Stopwatch();
							var addDocumentDutation = new Stopwatch();
							var convertToLuceneDocumentDuration = new Stopwatch();

							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
							{
								token.ThrowIfCancellationRequested();

								float boost;
								IndexingResult indexingResult;
								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									try
									{

										indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
									}
									catch (Exception e)
									{
										onErrorFunc(e, doc);
										continue;
									}
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);

								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									luceneDoc.GetFields().Clear();
									luceneDoc.Boost = boost;
									documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
									luceneDoc.Add(documentIdField);
									foreach (var field in indexingResult.Fields)
									{
										luceneDoc.Add(field);
									}
								}

								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											PublicName, indexingResult.NewDocId),
											exception);
										context.AddError(
											indexId,
											PublicName,
											indexingResult.NewDocId,
											exception,
											"OnIndexEntryCreated Trigger");
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

								using (StopwatchScope.For(addDocumentDutation))
								{
									AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
								}

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
							parallelOperations.Enqueue(parallelStats);

							parallelOperations.Enqueue(parallelStats);
						}
					});

					performanceStats.Add(new ParallelPerformanceStats
					{
						NumberOfThreads = parallelOperations.Count,
						DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
						BatchedOperations = parallelOperations.ToList()
					});

					var updateDocumentReferencesDuration = new Stopwatch();
					using (StopwatchScope.For(updateDocumentReferencesDuration))
					{
						UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
					}
					performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
							context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
							context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
						},
						x => x.Dispose());
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			}, writeToIndexStats);

			performanceStats.AddRange(writeToIndexStats);

			performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);

			logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);

			return performance;
		}
Example #11
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = SystemTime.UtcNow;
            var deleted     = new Dictionary <ReduceKeyAndBucket, int>();

            RecordCurrentBatch("Current Map", batch.Docs.Count);
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false)
                                   .ToList();
            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();
                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        foreach (var currentDoc in mapResults)
                        {
                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

                            EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count);

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
                    });
                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                }
            });



            UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                }
            }));


            UpdateIndexingStats(context, stats);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount  = sourceCount,
                InputCount  = documentsWrapped.Count,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            BatchCompleted("Current Map");
            logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
        }
Example #12
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				try
				{
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc,i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
									              name, documentId),
									exception);
								context.AddError(name,
								                 documentId,
								                 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryDeleted(documentId));
						if(batch.SkipDeleteFromIndex[i] == false)
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();


					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
						                                Field.Index.NOT_ANALYZED_NO_NORMS);

						foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats))
						{
							float boost;
							var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

							if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
							{
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
											string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											              name, indexingResult.NewDocId),
											exception);
										context.AddError(name,
										                 indexingResult.NewDocId,
										                 exception.Message
											);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
							}

							Interlocked.Increment(ref stats.IndexingSuccesses);
						}
					});
				}
				catch(Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(name, null, ex.Message);
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(name, null, e.Message);
						},
						x => x.Dispose());
				}
				return sourceCount;
			});
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
        private IEnumerable <Tuple <IndexToWorkOn, IndexingBatch> > FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs)
        {
            var last = jsonDocs.Last();

            Debug.Assert(last.Etag != null);
            Debug.Assert(last.LastModified != null);

            var lastEtag     = last.Etag.Value;
            var lastModified = last.LastModified.Value;

            var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray());

            var documentRetriever = new DocumentRetriever(null, context.ReadTriggers);

            var filteredDocs =
                BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc =>
            {
                doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
                return(doc == null ? null : new { Doc = doc, Json = JsonToExpando.Convert(doc.ToJson()) });
            });

            log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);

            var results = new Tuple <IndexToWorkOn, IndexingBatch> [indexesToWorkOn.Count];
            var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count];

            BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) =>
            {
                var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray());
                if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0)
                {
                    return;
                }

                var indexName     = indexToWorkOn.IndexName;
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
                if (viewGenerator == null)
                {
                    return;                     // probably deleted
                }
                var batch = new IndexingBatch();

                foreach (var item in filteredDocs)
                {
                    // did we already indexed this document in this index?
                    if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0)
                    {
                        continue;
                    }


                    // is the Raven-Entity-Name a match for the things the index executes on?
                    if (viewGenerator.ForEntityNames.Count != 0 &&
                        viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false)
                    {
                        continue;
                    }

                    batch.Add(item.Doc, item.Json);

                    if (batch.DateTime == null)
                    {
                        batch.DateTime = item.Doc.LastModified;
                    }
                    else
                    {
                        batch.DateTime = batch.DateTime > item.Doc.LastModified
                                                                        ? item.Doc.LastModified
                                                                        : batch.DateTime;
                    }
                }

                if (batch.Docs.Count == 0)
                {
                    log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
                              lastEtag, lastModified);
                    // we use it this way to batch all the updates together
                    actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified);
                    return;
                }
                log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn);
                results[i] = Tuple.Create(indexToWorkOn, batch);
            });

            transactionalStorage.Batch(actionsAccessor =>
            {
                foreach (var action in actions)
                {
                    if (action != null)
                    {
                        action(actionsAccessor);
                    }
                }
            });

            return(results.Where(x => x != null));
        }
Example #14
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var changed = new HashSet<ReduceKeyAndBucket>();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
				return doc;
			})
				.Where(x => x is FilteredDocument == false)
				.ToList();
			var items = new ConcurrentQueue<MapResultItem>();
			var stats = new IndexingWorkStats();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

			if (documentsWrapped.Count > 0)
				actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
				using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
				{
					var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats);
					var currentDocumentResults = new List<object>();
					string currentKey = null;
					foreach (var currentDoc in mapResults)
					{
						var documentId = GetDocumentId(currentDoc);
						if (documentId != currentKey)
						{
							count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items);
							currentDocumentResults.Clear();
							currentKey = documentId; 
						}
						currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));
					}
					count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items);
				}
			});
			

			IDictionary<string, HashSet<string>> result;
			while (allReferencedDocs.TryDequeue(out result))
			{
				foreach (var referencedDocument in result)
				{
					actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
					actions.General.MaybePulseTransaction();
				}
			}

			foreach (var mapResultItem in items)
			{
				changed.Add(new ReduceKeyAndBucket(mapResultItem.Bucket, mapResultItem.ReduceKey));
				actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
				actions.General.MaybePulseTransaction();
			}

			UpdateIndexingStats(context, stats);
			actions.MapReduce.ScheduleReductions(name, 0, changed);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start
			});
			logIndexing.Debug("Mapped {0} documents for {1}", count, name);
		}
Example #15
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count = 0;
            var sourceCount = 0;
            var sw = Stopwatch.StartNew();
            var start = SystemTime.UtcNow;
            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet<string>();
                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                    .Where(x => x != null)
                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                            return doc;
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                            {
                                logIndexing.WarnException(
                                    string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                                  name, documentId),
                                    exception);
                                context.AddError(name,
                                                 documentId,
                                                 exception.Message,
                                                 "OnIndexEntryDeleted Trigger"
                                    );
                            },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        return doc;
                    })
                        .Where(x => x is FilteredDocument == false)
                        .ToList();

                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
                        var luceneDoc = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                        {
                                            logIndexing.WarnException(
                                                string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                              name, indexingResult.NewDocId),
                                                exception);
                                            context.AddError(name,
                                                             indexingResult.NewDocId,
                                                             exception.Message,
                                                             "OnIndexEntryCreated Trigger"
                                                );
                                        },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex;
                    IDictionary<string, HashSet<string>> result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                            foreach (var childDocumentKey in referencedDocument.Value)
                            {
                                dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key);
                            }
                        }
                    }

                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                        {
                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                            context.AddError(name, null, ex.Message, "AnErrorOccured Trigger");
                        },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                        {
                            logIndexing.WarnException("Failed to dispose on index update trigger", e);
                            context.AddError(name, null, e.Message, "Dispose Trigger");
                        },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return new IndexedItemsInfo
                {
                    ChangedDocs = sourceCount,
                    HighestETag = batch.HighestEtagInBatch
                };
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount = sourceCount,
                InputCount = batch.Docs.Count,
                Duration = sw.Elapsed,
                Operation = "Index",
                Started = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
Example #16
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = SystemTime.UtcNow;

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                    .Where(x => x != null)
                                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }

                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                        {
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                        {
                            string currentDocId = null;
                            int outputPerDocId  = 0;
                            Action <Exception, object> onErrorFunc;
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
                            {
                                float boost;
                                IndexingResult indexingResult;
                                try
                                {
                                    indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                }
                                catch (InvalidSpatialShapeException e)
                                {
                                    onErrorFunc(e, doc);
                                    continue;
                                }

                                try
                                {
                                    indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                }
                                catch (Exception e)
                                {
                                    onErrorFunc(e, doc);
                                    continue;
                                }

// ReSharper disable once RedundantBoolCompare --> code clarity
                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
                                {
                                    continue;
                                }
                                if (currentDocId != indexingResult.NewDocId)
                                {
                                    currentDocId   = indexingResult.NewDocId;
                                    outputPerDocId = 0;
                                }
                                outputPerDocId++;
                                EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId);
                                Interlocked.Increment(ref count);
                                luceneDoc.GetFields().Clear();
                                luceneDoc.Boost = boost;
                                documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                luceneDoc.Add(documentIdField);
                                foreach (var field in indexingResult.Fields)
                                {
                                    luceneDoc.Add(field);
                                }
                                batchers.ApplyAndIgnoreAllErrors(
                                    exception =>
                                {
                                    logIndexing.WarnException(
                                        string.Format(
                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                            indexId, indexingResult.NewDocId),
                                        exception);
                                    context.AddError(indexId,
                                                     indexingResult.NewDocId,
                                                     exception.Message,
                                                     "OnIndexEntryCreated Trigger"
                                                     );
                                },
                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                        }
                    });
                    UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                        context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger", e);
                        context.AddError(indexId, null, e.Message, "Dispose Trigger");
                    },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
                {
                    ChangedDocs = sourceCount
                });
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount  = sourceCount,
                InputCount  = batch.Docs.Count,
                Duration    = sw.Elapsed,
                Operation   = "Index",
                Started     = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
        }
Example #17
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var deleted = new Dictionary<ReduceKeyAndBucket, int>();
			var performance = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
			var performanceStats = new List<BasePerformanceStats>();

			var usedStorageAccessors = new ConcurrentSet<IStorageActionsAccessor>();

			if (usedStorageAccessors.TryAdd(actions))
			{
				var storageCommitDuration = new Stopwatch();

				actions.BeforeStorageCommit += storageCommitDuration.Start;

				actions.AfterStorageCommit += () =>
				{
					storageCommitDuration.Stop();

					performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
				};
			}

			var deleteMappedResultsDuration = new Stopwatch();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				token.ThrowIfCancellationRequested();

				sourceCount++;
				var documentId = doc.__document_id;

				using (StopwatchScope.For(deleteMappedResultsDuration))
				{
					actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
				}
				
				return doc;
			})
			.Where(x => x is FilteredDocument == false)
			.ToList();

			performanceStats.Add(new PerformanceStats
			{
				Name = IndexingOperation.Map_DeleteMappedResults,
				DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
			});

			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
			var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();

			var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

			var parallelProcessingStart = SystemTime.UtcNow;

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
                token.ThrowIfCancellationRequested();
				var parallelStats = new ParallelBatchStats
				{
					StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
				};

				var localStats = new IndexingWorkStats();
				var localChanges = new HashSet<ReduceKeyAndBucket>();
				var statsPerKey = new Dictionary<string, int>();

				var linqExecutionDuration = new Stopwatch();
				var reduceInMapLinqExecutionDuration = new Stopwatch();
				var putMappedResultsDuration = new Stopwatch();
				var convertToRavenJObjectDuration = new Stopwatch();

				allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

				using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
				{
					// we are writing to the transactional store from multiple threads here, and in a streaming fashion
					// should result in less memory and better perf
					context.TransactionalStorage.Batch(accessor =>
					{
						if (usedStorageAccessors.TryAdd(accessor))
						{
							var storageCommitDuration = new Stopwatch();

							accessor.BeforeStorageCommit += storageCommitDuration.Start;

							accessor.AfterStorageCommit += () =>
							{
								storageCommitDuration.Stop();

								parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
							};
						}

						var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
						var currentDocumentResults = new List<object>();
						string currentKey = null;
						bool skipDocument = false;
						
						foreach (var currentDoc in mapResults)
						{
							token.ThrowIfCancellationRequested();

							var documentId = GetDocumentId(currentDoc);
							if (documentId != currentKey)
							{
								count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

								currentDocumentResults.Clear();
								currentKey = documentId;
							}
							else if (skipDocument)
							{
								continue;
							}

							RavenJObject currentDocJObject;
							using (StopwatchScope.For(convertToRavenJObjectDuration))
							{
								currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer);
							}

							currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject));

							if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
							{
								skipDocument = true;
								currentDocumentResults.Clear();
								continue;
							}

							Interlocked.Increment(ref localStats.IndexingSuccesses);
						}
						count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds));
						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds));

						parallelOperations.Enqueue(parallelStats);
					});

					allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
					allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
				}
			});

			performanceStats.Add(new ParallelPerformanceStats
			{
				NumberOfThreads = parallelOperations.Count,
				DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
				BatchedOperations = parallelOperations.ToList()
			});

			var updateDocumentReferencesDuration = new Stopwatch();
			using (StopwatchScope.For(updateDocumentReferencesDuration))
			{
			UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
			}
			performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));

			var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
					.Distinct()
					.ToList();

			var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
			var reduceKeyStats = allState.SelectMany(x => x.Item3)
										 .GroupBy(x => x.Key)
										 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
										 .ToList();

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					var reduceKeyStat = enumerator.Current;
					accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
				}
			}));


			var parallelReductionOperations = new ConcurrentQueue<ParallelBatchStats>();
			var parallelReductionStart = SystemTime.UtcNow;

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				var parallelStats = new ParallelBatchStats
				{
					StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds
				};

				var scheduleReductionsDuration = new Stopwatch();

				using (StopwatchScope.For(scheduleReductionsDuration))
				{
					while (enumerator.MoveNext())
					{
						accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
					}
				}

				parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
				parallelReductionOperations.Enqueue(parallelStats);
			}));

			performanceStats.Add(new ParallelPerformanceStats
			{
				NumberOfThreads = parallelReductionOperations.Count,
				DurationMs = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds,
				BatchedOperations = parallelReductionOperations.ToList()
			});

			UpdateIndexingStats(context, stats);

			performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats);

			logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);

			return performance;
		}
Example #18
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count             = 0;
            var sourceCount       = 0;
            var writeToIndexStats = new List <PerformanceStats>();

            IndexingPerformanceStats performance = null;
            var performanceStats = new List <BasePerformanceStats>();

            var storageCommitDuration = new Stopwatch();

            actions.BeforeStorageCommit += storageCommitDuration.Start;

            actions.AfterStorageCommit += () =>
            {
                storageCommitDuration.Stop();

                performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
            };

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                    .Where(x => x != null)
                                    .ToList();

                try
                {
                    performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

                    var deleteExistingDocumentsDuration = new Stopwatch();

                    Interlocked.Increment(ref sourceCount);
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        token.ThrowIfCancellationRequested();
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }

                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                        {
                            using (StopwatchScope.For(deleteExistingDocumentsDuration))
                            {
                                indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                            }
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    performanceStats.Add(new PerformanceStats
                    {
                        Name       = IndexingOperation.Lucene_DeleteExistingDocument,
                        DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
                    });

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();

                    var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                    var parallelProcessingStart = SystemTime.UtcNow;
                    context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator <dynamic> partition) =>
                    {
                        token.ThrowIfCancellationRequested();
                        var parallelStats = new ParallelBatchStats
                        {
                            StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                        };

                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                        {
                            string currentDocId = null;
                            int outputPerDocId  = 0;
                            Action <Exception, object> onErrorFunc;
                            bool skipDocument = false;

                            var linqExecutionDuration           = new Stopwatch();
                            var addDocumentDutation             = new Stopwatch();
                            var convertToLuceneDocumentDuration = new Stopwatch();

                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
                            {
                                token.ThrowIfCancellationRequested();

                                float boost;
                                IndexingResult indexingResult;
                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                                {
                                    try
                                    {
                                        indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                    }
                                    catch (Exception e)
                                    {
                                        onErrorFunc(e, doc);
                                        continue;
                                    }
                                }

                                // ReSharper disable once RedundantBoolCompare --> code clarity
                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
                                {
                                    continue;
                                }
                                if (currentDocId != indexingResult.NewDocId)
                                {
                                    currentDocId   = indexingResult.NewDocId;
                                    outputPerDocId = 0;
                                    skipDocument   = false;
                                }
                                if (skipDocument)
                                {
                                    continue;
                                }
                                outputPerDocId++;
                                if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
                                {
                                    skipDocument = true;
                                    continue;
                                }
                                Interlocked.Increment(ref count);

                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                                {
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                }

                                batchers.ApplyAndIgnoreAllErrors(
                                    exception =>
                                {
                                    logIndexing.WarnException(
                                        string.Format(
                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                            PublicName, indexingResult.NewDocId),
                                        exception);
                                    context.AddError(
                                        indexId,
                                        PublicName,
                                        indexingResult.NewDocId,
                                        exception,
                                        "OnIndexEntryCreated Trigger");
                                },
                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

                                using (StopwatchScope.For(addDocumentDutation))
                                {
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));

                            parallelOperations.Enqueue(parallelStats);
                        }
                    }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering));

                    performanceStats.Add(new ParallelPerformanceStats
                    {
                        NumberOfThreads   = parallelOperations.Count,
                        DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                        BatchedOperations = parallelOperations.ToList()
                    });

                    var updateDocumentReferencesDuration = new Stopwatch();
                    using (StopwatchScope.For(updateDocumentReferencesDuration))
                    {
                        UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
                    }
                    performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
                        context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
                        context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
                    },
                        x => x.Dispose());
                }
                return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
                {
                    ChangedDocs = sourceCount
                });
            }, writeToIndexStats);

            performanceStats.AddRange(writeToIndexStats);

            InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats);

            if (logIndexing.IsDebugEnabled)
            {
                logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);
            }

            return(performance);
        }
Example #19
0
		public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
Example #20
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count            = 0;
            var sourceCount      = 0;
            var deleted          = new Dictionary <ReduceKeyAndBucket, int>();
            var performance      = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count);
            var performanceStats = new List <BasePerformanceStats>();

            var usedStorageAccessors = new ConcurrentSet <IStorageActionsAccessor>();

            if (usedStorageAccessors.TryAdd(actions))
            {
                var storageCommitDuration = new Stopwatch();

                actions.BeforeStorageCommit += storageCommitDuration.Start;

                actions.AfterStorageCommit += () =>
                {
                    storageCommitDuration.Stop();

                    performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                };
            }

            var deleteMappedResultsDuration = new Stopwatch();
            var documentsWrapped            = batch.Docs.Select(doc =>
            {
                token.ThrowIfCancellationRequested();

                sourceCount++;
                var documentId = doc.__document_id;

                using (StopwatchScope.For(deleteMappedResultsDuration))
                {
                    actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                }

                return(doc);
            })
                                              .Where(x => x is FilteredDocument == false)
                                              .ToList();

            performanceStats.Add(new PerformanceStats
            {
                Name       = IndexingOperation.Map_DeleteMappedResults,
                DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds,
            });

            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

            var parallelProcessingStart = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                token.ThrowIfCancellationRequested();
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                };

                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();

                var linqExecutionDuration            = new Stopwatch();
                var reduceInMapLinqExecutionDuration = new Stopwatch();
                var putMappedResultsDuration         = new Stopwatch();
                var convertToRavenJObjectDuration    = new Stopwatch();

                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        if (usedStorageAccessors.TryAdd(accessor))
                        {
                            var storageCommitDuration = new Stopwatch();

                            accessor.BeforeStorageCommit += storageCommitDuration.Start;

                            accessor.AfterStorageCommit += () =>
                            {
                                storageCommitDuration.Stop();

                                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
                            };
                        }

                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        bool skipDocument          = false;

                        foreach (var currentDoc in mapResults)
                        {
                            token.ThrowIfCancellationRequested();

                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            else if (skipDocument)
                            {
                                continue;
                            }

                            RavenJObject currentDocJObject;
                            using (StopwatchScope.For(convertToRavenJObjectDuration))
                            {
                                currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer);
                            }

                            currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject));

                            if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
                            {
                                skipDocument = true;
                                currentDocumentResults.Clear();
                                continue;
                            }

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration);

                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds));
                        parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds));

                        parallelOperations.Enqueue(parallelStats);
                    });

                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                }
            });

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                BatchedOperations = parallelOperations.ToList()
            });

            var updateDocumentReferencesDuration = new Stopwatch();

            using (StopwatchScope.For(updateDocumentReferencesDuration))
            {
                UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
            }
            performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            actions.General.MaybePulseTransaction();

            var parallelReductionOperations = new ConcurrentQueue <ParallelBatchStats>();
            var parallelReductionStart      = SystemTime.UtcNow;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                var parallelStats = new ParallelBatchStats
                {
                    StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds
                };

                var scheduleReductionsDuration = new Stopwatch();

                using (StopwatchScope.For(scheduleReductionsDuration))
                {
                    while (enumerator.MoveNext())
                    {
                        accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                        accessor.General.MaybePulseTransaction();
                    }
                }

                parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
                parallelReductionOperations.Enqueue(parallelStats);
            }));

            performanceStats.Add(new ParallelPerformanceStats
            {
                NumberOfThreads   = parallelReductionOperations.Count,
                DurationMs        = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds,
                BatchedOperations = parallelReductionOperations.ToList()
            });

            UpdateIndexingStats(context, stats);

            performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats);

            logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);

            return(performance);
        }
Example #21
0
		private void IndexDocuments(IStorageActionsAccessor actions, string index, IndexingBatch batch)
		{
			var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(index);
			if (viewGenerator == null)
				return; // index was deleted, probably
			try
			{
				if (Log.IsDebugEnabled)
				{
					string ids;
					if (batch.Ids.Count < 256)
						ids = string.Join(",", batch.Ids);
					else
					{
						ids = string.Join(", ", batch.Ids.Take(128)) + " ... " + string.Join(", ", batch.Ids.Skip(batch.Ids.Count - 128));
					}
					Log.Debug("Indexing {0} documents for index: {1}. ({2})", batch.Docs.Count, index, ids);
				}
				context.CancellationToken.ThrowIfCancellationRequested();

				context.IndexStorage.Index(index, viewGenerator, batch, context, actions, batch.DateTime ?? DateTime.MinValue);
			}
			catch (OperationCanceledException)
			{
				throw;
			}
			catch (Exception e)
			{
				if (actions.IsWriteConflict(e))
					return;
				Log.WarnException(string.Format("Failed to index documents for index: {0}", index), e);
			}
		}
Example #22
0
		public void Index(string index,
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			WorkContext context,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			Index value;
			if (indexes.TryGetValue(index, out value) == false)
			{
				log.Debug("Tried to index on a non existent index {0}, ignoring", index);
				return;
			}
			using (EnsureInvariantCulture())
			using (DocumentCacher.SkipSettingDocumentsInDocumentCache())
			{
				value.IndexDocuments(viewGenerator, batch, context, actions, minimumTimestamp);
				context.RaiseIndexChangeNotification(new IndexChangeNotification
				{
					Name = index,
					Type = IndexChangeTypes.MapCompleted
				});
			}
		}
Example #23
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
Example #24
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = SystemTime.UtcNow;

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                                    .Where(x => x != null)
                                    .ToList();
                try
                {
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                        {
                            logIndexing.WarnException(
                                string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                              name, documentId),
                                exception);
                            context.AddError(name,
                                             documentId,
                                             exception.Message
                                             );
                        },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId))                             // maybe it is recently deleted?
                        {
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                    {
                                        logIndexing.WarnException(
                                            string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                          name, indexingResult.NewDocId),
                                            exception);
                                        context.AddError(name,
                                                         indexingResult.NewDocId,
                                                         exception.Message
                                                         );
                                    },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    IDictionary <string, HashSet <string> > result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                        }
                    }
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                        context.AddError(name, null, ex.Message);
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger", e);
                        context.AddError(name, null, e.Message);
                    },
                        x => x.Dispose());
                }
                return(sourceCount);
            });
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Duration    = sw.Elapsed,
                Operation   = "Index",
                Started     = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
        private IEnumerable <IndexingBatchForIndex> FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, List <JsonDocument> jsonDocs, Etag highestETagInBatch)
        {
            var last = jsonDocs.Last();

            Debug.Assert(last.Etag != null);
            Debug.Assert(last.LastModified != null);

            var lastEtag     = last.Etag;
            var lastModified = last.LastModified.Value;

            var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers, context.Database.InFlightTransactionalState);

            var filteredDocs =
                BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc =>
            {
                var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index);
                return(filteredDoc == null ? new
                {
                    Doc = doc,
                    Json = (object)new FilteredDocument(doc)
                } : new
                {
                    Doc = filteredDoc,
                    Json = JsonToExpando.Convert(doc.ToJson())
                });
            });

            Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count);

            var results = new IndexingBatchForIndex[indexesToWorkOn.Count];
            var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count];

            BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) =>
            {
                var indexName     = indexToWorkOn.Index.PublicName;
                var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName);
                if (viewGenerator == null)
                {
                    return;                     // probably deleted
                }
                var batch = new IndexingBatch(highestETagInBatch);

                foreach (var item in filteredDocs)
                {
                    if (defaultPrefetchingBehavior.FilterDocuments(item.Doc) == false)
                    {
                        continue;
                    }

                    // did we already indexed this document in this index?
                    var etag = item.Doc.Etag;
                    if (etag == null)
                    {
                        continue;
                    }

                    // is the Raven-Entity-Name a match for the things the index executes on?
                    if (viewGenerator.ForEntityNames.Count != 0 &&
                        viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false)
                    {
                        continue;
                    }

                    batch.Add(item.Doc, item.Json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc));

                    if (batch.DateTime == null)
                    {
                        batch.DateTime = item.Doc.LastModified;
                    }
                    else
                    {
                        batch.DateTime = batch.DateTime > item.Doc.LastModified
                                                                                         ? item.Doc.LastModified
                                                                                         : batch.DateTime;
                    }
                }

                if (batch.Docs.Count == 0)
                {
                    Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName,
                              lastEtag, lastModified);
                    // we use it this way to batch all the updates together
                    actions[i] = accessor =>
                    {
                        accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified);

                        accessor.AfterStorageCommit += () =>
                        {
                            indexToWorkOn.Index.EnsureIndexWriter();
                            indexToWorkOn.Index.Flush(lastEtag);
                        };
                    };
                    return;
                }
                if (Log.IsDebugEnabled)
                {
                    Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids));
                }
                results[i] = new IndexingBatchForIndex
                {
                    Batch           = batch,
                    IndexId         = indexToWorkOn.IndexId,
                    Index           = indexToWorkOn.Index,
                    LastIndexedEtag = indexToWorkOn.LastIndexedEtag
                };
            });

            transactionalStorage.Batch(actionsAccessor =>
            {
                foreach (var action in actions)
                {
                    if (action != null)
                    {
                        action(actionsAccessor);
                    }
                }
            });

            return(results.Where(x => x != null));
        }
Example #26
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var changed = new HashSet<ReduceKeyAndBucket>();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
				return doc;
			})
				.Where(x => x is FilteredDocument == false);
			var items = new List<MapResultItem>();
			var stats = new IndexingWorkStats();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
			{
				var mapResults = RobustEnumerationIndex(
						documentsWrapped.GetEnumerator(), 
						viewGenerator.MapDefinitions, 
						actions, 
						stats)
					.ToList();
				actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

				foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
				{
					var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
					foreach (
						var doc in
							RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
					{
						count++;

						var reduceValue = viewGenerator.GroupByExtraction(doc);
						if (reduceValue == null)
						{
							logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
											  viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
							continue;
						}
						var reduceKey = ReduceKeyToString(reduceValue);
						var docId = mappedResultFromDocument.Key.ToString();

						var data = GetMappedData(doc);

						items.Add(new MapResultItem
						{
							Data = data,
							DocId = docId,
							ReduceKey = reduceKey
						});

						changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
					}
				}
			}

			IDictionary<string, HashSet<string>> result;
			while (allReferencedDocs.TryDequeue(out result))
			{
				foreach (var referencedDocument in result)
				{
					actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
					actions.General.MaybePulseTransaction();
				}
			}

			foreach (var mapResultItem in items)
			{
				actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
				actions.General.MaybePulseTransaction();
			}

			UpdateIndexingStats(context, stats);
			actions.MapReduce.ScheduleReductions(name, 0, changed);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start
			});
			logIndexing.Debug("Mapped {0} documents for {1}", count, name);
		}