Пример #1
0
        public override void IndexDocuments(
			AbstractViewGenerator viewGenerator, 
			IEnumerable<dynamic> documents, 
			WorkContext context, 
			IStorageActionsAccessor actions, 
			DateTime minimumTimestamp)
        {
            actions.Indexing.SetCurrentIndexStatsTo(name);
            var count = 0;
            Func<object, object> documentIdFetcher = null;
            var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
            var documentsWrapped = documents.Select(doc =>
            {
                var documentId = doc.__document_id;
                foreach (var reduceKey in actions.MappedResults.DeleteMappedResultsForDocumentId((string)documentId, name))
                {
                    reduceKeys.Add(reduceKey);
                }
                return doc;
            });
            foreach (var doc in RobustEnumeration(documentsWrapped, viewGenerator.MapDefinition, actions, context))
            {
                count++;

                documentIdFetcher = CreateDocumentIdFetcherIfNeeded(documentIdFetcher, doc);

                var docIdValue = documentIdFetcher(doc);
                if (docIdValue == null)
                    throw new InvalidOperationException("Could not find document id for this document");

                var reduceValue = viewGenerator.GroupByExtraction(doc);
                if (reduceValue == null)
                {
                    logIndexing.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, docIdValue);
                    continue;
                }
                var reduceKey = ReduceKeyToString(reduceValue);
                var docId = docIdValue.ToString();

                reduceKeys.Add(reduceKey);

                var data = GetMapedData(doc);

                logIndexing.DebugFormat("Mapped result for '{0}': '{1}'", name, data);

                var hash = ComputeHash(name, reduceKey);

                actions.MappedResults.PutMappedResult(name, docId, reduceKey, data, hash);

                actions.Indexing.IncrementSuccessIndexing();
            }

            actions.Tasks.AddTask(new ReduceTask
            {
                Index = name,
                ReduceKeys = reduceKeys.ToArray()
            }, minimumTimestamp);

            logIndexing.DebugFormat("Mapped {0} documents for {1}", count, name);
        }
Пример #2
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable<object> documents,
            WorkContext context,
            DocumentStorageActions actions)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            Func<object, object> documentIdFetcher = null;
            var reduceKeys = new HashSet<string>();
            foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context))
            {
                count++;

                documentIdFetcher = CreateDocumentIdFetcherIfNeeded(documentIdFetcher, doc);

                var docIdValue = documentIdFetcher(doc);
                if (docIdValue == null)
                    throw new InvalidOperationException("Could not find document id for this document");

                var reduceValue = viewGenerator.GroupByExtraction(doc);
                if (reduceValue == null)
                {
                    log.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, docIdValue);
                    continue;
                }
                var reduceKey = ReduceKeyToString(reduceValue);
                var docId = docIdValue.ToString();

                reduceKeys.Add(reduceKey);

                string data = GetMapedData(doc);

                log.DebugFormat("Mapped result for '{0}': '{1}'", name, data);

                var hash = ComputeHash(name, reduceKey);

                actions.PutMappedResult(name, docId, reduceKey, data, hash);

                actions.IncrementSuccessIndexing();
            }

            foreach (var reduceKey in reduceKeys)
            {
                actions.AddTask(new ReduceTask
                {
                    Index = name,
                    ReduceKey = reduceKey
                });
            }

            log.DebugFormat("Mapped {0} documents for {1}", count, name);
        }
Пример #3
0
			private string ExtractReduceKey(AbstractViewGenerator viewGenerator, object doc)
			{
				try
				{
					object reduceKey = viewGenerator.GroupByExtraction(doc);
					if (reduceKey == null)
					{
						throw new InvalidOperationException("Could not find reduce key for " + indexId + " in the result: " + doc);
					}
					return ReduceKeyToString(reduceKey);
				}
				catch (Exception e)
				{
					throw new InvalidOperationException("Could not extract reduce key from reduce result!", e);
				}
			}
Пример #4
0
		private int ProcessBatch(AbstractViewGenerator viewGenerator, List<object> currentDocumentResults, string currentKey, HashSet<ReduceKeyAndBucket> changes,
			IStorageActionsAccessor actions,
			IDictionary<string, int> statsPerKey)
		{
			if (currentKey == null || currentDocumentResults.Count == 0)
				return 0;
			var old = CurrentIndexingScope.Current;
			try
			{
				CurrentIndexingScope.Current = null;

				if (logIndexing.IsDebugEnabled)
				{
					var sb = new StringBuilder()
						.AppendFormat("Index {0} for document {1} resulted in:", PublicName, currentKey)
						.AppendLine();
					foreach (var currentDocumentResult in currentDocumentResults)
					{
						sb.AppendLine(JsonConvert.SerializeObject(currentDocumentResult));
					}
					logIndexing.Debug(sb.ToString());
				}

				int count = 0;
				var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition);
				foreach (var doc in results)
				{
					count++;

					var reduceValue = viewGenerator.GroupByExtraction(doc);
					if (reduceValue == null)
					{
						logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
										  viewGenerator.GroupByExtraction, currentKey);
						continue;
					}
					string reduceKey = ReduceKeyToString(reduceValue);

					var data = GetMappedData(doc);

					logIndexing.Debug("Index {0} for document {1} resulted in ({2}): {3}", PublicName, currentKey, reduceKey, data);
					actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data);
					statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1;
					actions.General.MaybePulseTransaction();
					changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey));
				}
				return count;
			}
			finally
			{
				CurrentIndexingScope.Current = old;
			}
		}
Пример #5
0
		private int ProcessBatch(AbstractViewGenerator viewGenerator, List<object> currentDocumentResults, string currentKey, HashSet<ReduceKeyAndBucket> changes,
			IStorageActionsAccessor actions,
			IDictionary<string, int> statsPerKey)
		{
			if (currentKey == null || currentDocumentResults.Count == 0)
				return 0;

			int count = 0;
			var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition);
			foreach (var doc in results)
			{
				count++;

				var reduceValue = viewGenerator.GroupByExtraction(doc);
				if (reduceValue == null)
				{
					logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
									  viewGenerator.GroupByExtraction, currentKey);
					continue;
				}
				string reduceKey = ReduceKeyToString(reduceValue);

				var data = GetMappedData(doc);

				actions.MapReduce.PutMappedResult(name, currentKey, reduceKey, data);
				statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1;
				actions.General.MaybePulseTransaction();
				changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey));
			}
			return count;
		}
Пример #6
0
        public void ReduceDocuments(AbstractViewGenerator viewGenerator,
                                    IEnumerable<object> mappedResults,
                                    WorkContext context,
									IStorageActionsAccessor actions,
                                    string[] reduceKeys)
        {
            actions.Indexing.SetCurrentIndexStatsTo(name);
            var count = 0;
            Write(context, indexWriter =>
            {
                var batchers = context.IndexUpdateTriggers.Select(x=>x.CreateBatcher(name))
                    .Where(x=>x!=null)
                    .ToList();
                foreach (var reduceKey in reduceKeys)
            	{
            	    var entryKey = reduceKey;
            	    indexWriter.DeleteDocuments(new Term("__reduce_key", entryKey.ToLowerInvariant()));
                    batchers.ApplyAndIgnoreAllErrors(
                        exception =>
                        {
                            logIndexing.WarnFormat(exception,
                                                   "Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                                   name, entryKey);
                            context.AddError(name,
                                           entryKey,
                                           exception.Message
                              );
                        },
                        trigger => trigger.OnIndexEntryDeleted(name, entryKey));
				}
                PropertyDescriptorCollection properties = null;
                foreach (var doc in RobustEnumeration(mappedResults, viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;
                    var fields = GetFields(doc, ref properties);
                	dynamic reduceKey = viewGenerator.GroupByExtraction(doc);
					if (reduceKey == null)
					{
						throw new InvalidOperationException("Could not find reduce key for " + name + " in the result: " + doc);
					}
					string reduceKeyAsString = ReduceKeyToString(reduceKey);

                	var luceneDoc = new Document();
                    luceneDoc.Add(new Field("__reduce_key", reduceKeyAsString.ToLowerInvariant(), Field.Store.NO, Field.Index.NOT_ANALYZED));
                    foreach (var field in fields)
                    {
                        luceneDoc.Add(field);
                    }
                    batchers.ApplyAndIgnoreAllErrors(
                        exception =>
                        {
                            logIndexing.WarnFormat(exception,
                                                   "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                   name, reduceKeyAsString);
                            context.AddError(name,
                                           reduceKeyAsString,
                                           exception.Message
                              );
                        },
                        trigger => trigger.OnIndexEntryCreated(name, reduceKeyAsString, luceneDoc));
					logIndexing.DebugFormat("Reduce key {0} result in index {1} gave document: {2}", reduceKeyAsString, name, luceneDoc);
                    indexWriter.AddDocument(luceneDoc);
                    actions.Indexing.IncrementSuccessIndexing();
                }
                batchers.ApplyAndIgnoreAllErrors(
                    e =>
                    {
                        logIndexing.Warn("Failed to dispose on index update trigger", e);
                        context.AddError(name, null, e.Message);
                    },
                    x => x.Dispose());
                return true;
            });
			if (logIndexing.IsDebugEnabled)
			{
				logIndexing.DebugFormat("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name, string.Join(", ", reduceKeys));
			}
        }
Пример #7
0
		private int ProcessBatch(
			AbstractViewGenerator viewGenerator,
		    List<object> currentDocumentResults, 
			string currentKey, 
			ConcurrentQueue<MapResultItem> items)
		{
			if (currentKey == null || currentDocumentResults.Count == 0)
				return 0;

			int count = 0;
			var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition);
			foreach (var doc in results)
			{
				count++;

				var reduceValue = viewGenerator.GroupByExtraction(doc);
				if (reduceValue == null)
				{
					logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
					                  viewGenerator.GroupByExtraction, currentKey);
					continue;
				}
				var reduceKey = ReduceKeyToString(reduceValue);

				var data = GetMappedData(doc);

				items.Enqueue(new MapResultItem
				{
					Data = data,
					DocId = currentKey,
					ReduceKey = reduceKey,
					Bucket = IndexingUtil.MapBucket(currentKey)
				});
			}
			return count;
		}
Пример #8
0
        public void ReduceDocuments(AbstractViewGenerator viewGenerator,
                                    IEnumerable<object> mappedResults,
                                    WorkContext context,
									IStorageActionsAccessor actions,
                                    string[] reduceKeys)
        {
            actions.Indexing.SetCurrentIndexStatsTo(name);
            var count = 0;
            Write(indexWriter =>
            {
            	foreach (var reduceKey in reduceKeys)
            	{
					indexWriter.DeleteDocuments(new Term("__reduce_key", reduceKey));
					context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryDeleted(name, reduceKey));
				}
                PropertyDescriptorCollection properties = null;
                foreach (var doc in RobustEnumeration(mappedResults, viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;
                    var fields = GetFields(doc, ref properties);
                	dynamic reduceKey = viewGenerator.GroupByExtraction(doc);
					if (reduceKey == null)
					{
						throw new InvalidOperationException("Could not find reduce key for " + name + " in the result: " + doc);
					}
					string reduceKeyAsString = ReduceKeyToString(reduceKey);

                	var luceneDoc = new Document();
                    luceneDoc.Add(new Field("__reduce_key", reduceKeyAsString, Field.Store.NO, Field.Index.NOT_ANALYZED));
                    foreach (var field in fields)
                    {
                        luceneDoc.Add(field);
                    }
                    context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryCreated(name, reduceKeyAsString, luceneDoc));
                    log.DebugFormat("Reduce key {0} result in index {1} gave document: {2}", reduceKeyAsString, name, luceneDoc);
                    indexWriter.AddDocument(luceneDoc);
                    actions.Indexing.IncrementSuccessIndexing();
                }

                return true;
            });
			if(log.IsDebugEnabled)
			{
				log.DebugFormat("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name, string.Join(", ", reduceKeys));
			}
        }
Пример #9
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var changed = new HashSet<ReduceKeyAndBucket>();
			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
				return doc;
			})
				.Where(x => x is FilteredDocument == false);
			var items = new List<MapResultItem>();
			var stats = new IndexingWorkStats();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
			{
				var mapResults = RobustEnumerationIndex(
						documentsWrapped.GetEnumerator(), 
						viewGenerator.MapDefinitions, 
						actions, 
						stats)
					.ToList();
				actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

				foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
				{
					var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
					foreach (
						var doc in
							RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
					{
						count++;

						var reduceValue = viewGenerator.GroupByExtraction(doc);
						if (reduceValue == null)
						{
							logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
											  viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
							continue;
						}
						var reduceKey = ReduceKeyToString(reduceValue);
						var docId = mappedResultFromDocument.Key.ToString();

						var data = GetMappedData(doc);

						items.Add(new MapResultItem
						{
							Data = data,
							DocId = docId,
							ReduceKey = reduceKey
						});

						changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
					}
				}
			}

			IDictionary<string, HashSet<string>> result;
			while (allReferencedDocs.TryDequeue(out result))
			{
				foreach (var referencedDocument in result)
				{
					actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
					actions.General.MaybePulseTransaction();
				}
			}

			foreach (var mapResultItem in items)
			{
				actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
				actions.General.MaybePulseTransaction();
			}

			UpdateIndexingStats(context, stats);
			actions.MapReduce.ScheduleReductions(name, 0, changed);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start
			});
			logIndexing.Debug("Mapped {0} documents for {1}", count, name);
		}