コード例 #1
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
コード例 #2
0
ファイル: SimpleIndex.cs プロジェクト: krigsman/ravendb
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var  count                = 0;
            var  sourceCount          = 0;
            var  sw                   = Stopwatch.StartNew();
            var  start                = SystemTime.UtcNow;
            int  loadDocumentCount    = 0;
            long loadDocumentDuration = 0;

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                    .Where(x => x != null)
                                    .ToList();
                try
                {
                    var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
                    batch.SetIndexingPerformance(indexingPerfStats);

                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }

                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId))                             // maybe it is recently deleted?
                        {
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                        {
                            string currentDocId = null;
                            int outputPerDocId  = 0;
                            Action <Exception, object> onErrorFunc;
                            bool skipDocument = false;
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
                            {
                                float boost;
                                IndexingResult indexingResult;
                                try
                                {
                                    indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                }
                                catch (Exception e)
                                {
                                    onErrorFunc(e, doc);
                                    continue;
                                }

                                // ReSharper disable once RedundantBoolCompare --> code clarity
                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
                                {
                                    continue;
                                }
                                if (currentDocId != indexingResult.NewDocId)
                                {
                                    currentDocId   = indexingResult.NewDocId;
                                    outputPerDocId = 0;
                                    skipDocument   = false;
                                }
                                if (skipDocument)
                                {
                                    continue;
                                }
                                outputPerDocId++;
                                if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
                                {
                                    skipDocument = true;
                                    continue;
                                }
                                Interlocked.Increment(ref count);
                                luceneDoc.GetFields().Clear();
                                luceneDoc.Boost = boost;
                                documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                luceneDoc.Add(documentIdField);
                                foreach (var field in indexingResult.Fields)
                                {
                                    luceneDoc.Add(field);
                                }
                                batchers.ApplyAndIgnoreAllErrors(
                                    exception =>
                                {
                                    logIndexing.WarnException(
                                        string.Format(
                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                            indexId, indexingResult.NewDocId),
                                        exception);
                                    context.AddError(indexId,
                                                     indexingResult.NewDocId,
                                                     exception.Message,
                                                     "OnIndexEntryCreated Trigger"
                                                     );
                                },
                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

                            Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
                            Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
                        }
                    });
                    UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                        context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger", e);
                        context.AddError(indexId, null, e.Message, "Dispose Trigger");
                    },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
                {
                    ChangedDocs = sourceCount
                });
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount            = count,
                ItemsCount             = sourceCount,
                InputCount             = batch.Docs.Count,
                Duration               = sw.Elapsed,
                Operation              = "Index",
                Started                = start,
                LoadDocumentCount      = loadDocumentCount,
                LoadDocumentDurationMs = loadDocumentDuration
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
        }
コード例 #3
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var deleted = new Dictionary<ReduceKeyAndBucket, int>();
			var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count);
			batch.SetIndexingPerformance(indexPerfStats);

			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
				return doc;
			})
				.Where(x => x is FilteredDocument == false)
				.ToList();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
			var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();

			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
				var localStats = new IndexingWorkStats();
				var localChanges = new HashSet<ReduceKeyAndBucket>();
				var statsPerKey = new Dictionary<string, int>();
				allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

				using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
				{
					// we are writing to the transactional store from multiple threads here, and in a streaming fashion
					// should result in less memory and better perf
					context.TransactionalStorage.Batch(accessor =>
					{
						var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
						var currentDocumentResults = new List<object>();
						string currentKey = null;
						bool skipDocument = false;
						foreach (var currentDoc in mapResults)
						{
							var documentId = GetDocumentId(currentDoc);
							if (documentId != currentKey)
							{
								count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
								currentDocumentResults.Clear();
								currentKey = documentId;
							}
							else if (skipDocument)
							{
								continue;
							}
							currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

							if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
							{
								skipDocument = true;
								currentDocumentResults.Clear();
								continue;
							}

							Interlocked.Increment(ref localStats.IndexingSuccesses);
						}
						count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
					});
					allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
					allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
					Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
					Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
				}
			});



			UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

			var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
					.Distinct()
					.ToList();

			var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
			var reduceKeyStats = allState.SelectMany(x => x.Item3)
										 .GroupBy(x => x.Key)
										 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
										 .ToList();

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					var reduceKeyStat = enumerator.Current;
					accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
				}
			}));

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
				}
			}));


			UpdateIndexingStats(context, stats);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = documentsWrapped.Count,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			BatchCompleted("Current Map");
			logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
		}
コード例 #4
0
ファイル: MapReduceIndex.cs プロジェクト: krigsman/ravendb
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count          = 0;
            var sourceCount    = 0;
            var sw             = Stopwatch.StartNew();
            var start          = SystemTime.UtcNow;
            var deleted        = new Dictionary <ReduceKeyAndBucket, int>();
            var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count);

            batch.SetIndexingPerformance(indexPerfStats);

            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false)
                                   .ToList();
            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
            var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();
            var allState          = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >();

            int  loadDocumentCount    = 0;
            long loadDocumentDuration = 0;

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
            {
                var localStats   = new IndexingWorkStats();
                var localChanges = new HashSet <ReduceKeyAndBucket>();
                var statsPerKey  = new Dictionary <string, int>();
                allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

                using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                {
                    // we are writing to the transactional store from multiple threads here, and in a streaming fashion
                    // should result in less memory and better perf
                    context.TransactionalStorage.Batch(accessor =>
                    {
                        var mapResults             = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
                        var currentDocumentResults = new List <object>();
                        string currentKey          = null;
                        bool skipDocument          = false;
                        foreach (var currentDoc in mapResults)
                        {
                            var documentId = GetDocumentId(currentDoc);
                            if (documentId != currentKey)
                            {
                                count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
                                currentDocumentResults.Clear();
                                currentKey = documentId;
                            }
                            else if (skipDocument)
                            {
                                continue;
                            }
                            currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

                            if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
                            {
                                skipDocument = true;
                                currentDocumentResults.Clear();
                                continue;
                            }

                            Interlocked.Increment(ref localStats.IndexingSuccesses);
                        }
                        count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
                    });
                    allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                    allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                    Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
                    Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
                }
            });



            UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

            var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
                          .Distinct()
                          .ToList();

            var stats          = new IndexingWorkStats(allState.Select(x => x.Item2));
            var reduceKeyStats = allState.SelectMany(x => x.Item3)
                                 .GroupBy(x => x.Key)
                                 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
                                 .ToList();

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    var reduceKeyStat = enumerator.Current;
                    accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
                }
            }));

            BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
            {
                while (enumerator.MoveNext())
                {
                    accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
                }
            }));


            UpdateIndexingStats(context, stats);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount            = count,
                ItemsCount             = sourceCount,
                InputCount             = documentsWrapped.Count,
                Operation              = "Map",
                Duration               = sw.Elapsed,
                Started                = start,
                LoadDocumentCount      = loadDocumentCount,
                LoadDocumentDurationMs = loadDocumentDuration
            });
            BatchCompleted("Current Map");
            logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
        }