public AnonymousObjectToLuceneDocumentConverter(DocumentDatabase database, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, ILog log)
		{
			this.database = database;
			this.indexDefinition = indexDefinition;
			this.viewGenerator = viewGenerator;
			this.log = log;
		}
Пример #2
0
 public MapReduceIndex(Directory directory, int id, IndexDefinition indexDefinition,
                       AbstractViewGenerator viewGenerator, WorkContext context)
     : base(directory, id, indexDefinition, viewGenerator, context)
 {
     jsonSerializer = JsonExtensions.CreateDefaultJsonSerializer();
     jsonSerializer.Converters = MapReduceConverters;
 }
Пример #3
0
			public IndexPropertyBatcher(DocumentDatabase database, IndexedPropertiesSetupDoc setupDoc, string index, AbstractViewGenerator viewGenerator)
			{
				this.database = database;
				this.setupDoc = setupDoc;
				this.index = index;
				this.viewGenerator = viewGenerator;
			}
Пример #4
0
        public override void IndexDocuments(
			AbstractViewGenerator viewGenerator, 
			IEnumerable<dynamic> documents, 
			WorkContext context, 
			IStorageActionsAccessor actions, 
			DateTime minimumTimestamp)
        {
            actions.Indexing.SetCurrentIndexStatsTo(name);
            var count = 0;
            Func<object, object> documentIdFetcher = null;
            var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
            var documentsWrapped = documents.Select(doc =>
            {
                var documentId = doc.__document_id;
                foreach (var reduceKey in actions.MappedResults.DeleteMappedResultsForDocumentId((string)documentId, name))
                {
                    reduceKeys.Add(reduceKey);
                }
                return doc;
            });
            foreach (var doc in RobustEnumeration(documentsWrapped, viewGenerator.MapDefinition, actions, context))
            {
                count++;

                documentIdFetcher = CreateDocumentIdFetcherIfNeeded(documentIdFetcher, doc);

                var docIdValue = documentIdFetcher(doc);
                if (docIdValue == null)
                    throw new InvalidOperationException("Could not find document id for this document");

                var reduceValue = viewGenerator.GroupByExtraction(doc);
                if (reduceValue == null)
                {
                    logIndexing.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, docIdValue);
                    continue;
                }
                var reduceKey = ReduceKeyToString(reduceValue);
                var docId = docIdValue.ToString();

                reduceKeys.Add(reduceKey);

                var data = GetMapedData(doc);

                logIndexing.DebugFormat("Mapped result for '{0}': '{1}'", name, data);

                var hash = ComputeHash(name, reduceKey);

                actions.MappedResults.PutMappedResult(name, docId, reduceKey, data, hash);

                actions.Indexing.IncrementSuccessIndexing();
            }

            actions.Tasks.AddTask(new ReduceTask
            {
                Index = name,
                ReduceKeys = reduceKeys.ToArray()
            }, minimumTimestamp);

            logIndexing.DebugFormat("Mapped {0} documents for {1}", count, name);
        }
Пример #5
0
		public MapReduceIndex(Directory directory, string name, IndexDefinition indexDefinition,
							  AbstractViewGenerator viewGenerator, WorkContext context)
			: base(directory, name, indexDefinition, viewGenerator, context)
		{
			jsonSerializer = new JsonSerializer();
			foreach (var jsonConverter in Default.Converters)
			{
				jsonSerializer.Converters.Add(jsonConverter);
			}
		}
Пример #6
0
		public MapReduceIndex(Directory directory, int id, IndexDefinition indexDefinition,
							  AbstractViewGenerator viewGenerator, WorkContext context)
			: base(directory, id, indexDefinition, viewGenerator, context)
		{
			jsonSerializer = new JsonSerializer();
			foreach (var jsonConverter in Default.Converters)
			{
				jsonSerializer.Converters.Add(jsonConverter);
			}
			jsonSerializer.Converters.Add(new IgnoreFieldable());
		}
Пример #7
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable<object> documents,
            WorkContext context,
            DocumentStorageActions actions)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            Func<object, object> documentIdFetcher = null;
            var reduceKeys = new HashSet<string>();
            foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context))
            {
                count++;

                documentIdFetcher = CreateDocumentIdFetcherIfNeeded(documentIdFetcher, doc);

                var docIdValue = documentIdFetcher(doc);
                if (docIdValue == null)
                    throw new InvalidOperationException("Could not find document id for this document");

                var reduceValue = viewGenerator.GroupByExtraction(doc);
                if (reduceValue == null)
                {
                    log.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, docIdValue);
                    continue;
                }
                var reduceKey = ReduceKeyToString(reduceValue);
                var docId = docIdValue.ToString();

                reduceKeys.Add(reduceKey);

                string data = GetMapedData(doc);

                log.DebugFormat("Mapped result for '{0}': '{1}'", name, data);

                var hash = ComputeHash(name, reduceKey);

                actions.PutMappedResult(name, docId, reduceKey, data, hash);

                actions.IncrementSuccessIndexing();
            }

            foreach (var reduceKey in reduceKeys)
            {
                actions.AddTask(new ReduceTask
                {
                    Index = name,
                    ReduceKey = reduceKey
                });
            }

            log.DebugFormat("Mapped {0} documents for {1}", count, name);
        }
Пример #8
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable<object> documents,
            WorkContext context,
            DocumentStorageActions actions)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            PropertyDescriptor groupByPropertyDescriptor = null;
            PropertyDescriptor documentIdPropertyDescriptor = null;
            var reduceKeys = new HashSet<string>();
            foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context))
            {
                count++;

                if (groupByPropertyDescriptor == null)
                {
                    var props = TypeDescriptor.GetProperties(doc);
                    groupByPropertyDescriptor = props.Find(viewGenerator.GroupByField, false);
                    documentIdPropertyDescriptor = props.Find("__document_id", false);
                }

                var docIdValue = documentIdPropertyDescriptor.GetValue(doc);
                if (docIdValue == null)
                    throw new InvalidOperationException("Could not find document id for this document");

                var reduceValue = groupByPropertyDescriptor.GetValue(doc);
                if (reduceValue == null)
                {
                    log.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByField, docIdValue);
                    continue;
                }
                var reduceKey = reduceValue.ToString();
                var docId = docIdValue.ToString();

                reduceKeys.Add(reduceKey);

                actions.PutMappedResult(name, docId, reduceKey, JObject.FromObject(doc).ToString(Formatting.None));

                actions.IncrementSuccessIndexing();
            }

            foreach (var reduceKey in reduceKeys)
            {
                actions.AddTask(new ReduceTask
                {
                    Index = name,
                    ReduceKey = reduceKey
                });
            }

            log.DebugFormat("Mapped {0} documents for {1}", count, name);
        }
Пример #9
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator, 
			IEnumerable<object> documents,
			WorkContext context,
			IStorageActionsAccessor actions)
		{
			actions.Indexing.SetCurrentIndexStatsTo(name);
			var count = 0;
			Write(indexWriter =>
			{
				bool madeChanges = false;
				PropertyDescriptorCollection properties = null;
				var processedKeys = new HashSet<string>();
				var documentsWrapped = documents.Select((dynamic doc) =>
				{
					var documentId = doc.__document_id.ToString();
					if (processedKeys.Add(documentId) == false)
						return doc;
					madeChanges = true;
					context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryDeleted(name, documentId));
					indexWriter.DeleteDocuments(new Term("__document_id", documentId));
					return doc;
				});
				foreach (var doc in RobustEnumeration(documentsWrapped, viewGenerator.MapDefinition, actions, context))
				{
					count++;

				    string newDocId;
				    IEnumerable<AbstractField> fields;
                    if (doc is DynamicJsonObject)
                        fields = ExtractIndexDataFromDocument((DynamicJsonObject) doc, out newDocId);
                    else
                        fields = ExtractIndexDataFromDocument(properties, doc, out newDocId);
				   
                    if (newDocId != null)
                    {
                        var luceneDoc = new Document();
                        luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.NOT_ANALYZED));

                    	madeChanges = true;
                        CopyFieldsToDocument(luceneDoc, fields);
                        context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryCreated(name, newDocId, luceneDoc));
                        log.DebugFormat("Index '{0}' resulted in: {1}", name, luceneDoc);
                        indexWriter.AddDocument(luceneDoc);
                    }

					actions.Indexing.IncrementSuccessIndexing();
				}

				return madeChanges;
			});
			log.DebugFormat("Indexed {0} documents for {1}", count, name);
		}
Пример #10
0
 public Etag OptimizeCutoffForIndex(AbstractViewGenerator viewGenerator, Etag cutoffEtag)
 {
     if (cutoffEtag != null) return cutoffEtag;
     if (viewGenerator.ReduceDefinition == null && viewGenerator.ForEntityNames.Count > 0)
     {
         var etags = viewGenerator.ForEntityNames.Select(GetLastEtagForCollection)
                                 .Where(x=> x != null);
         if (etags.Any())
             return etags.Max();
     }
     return null;
 }
Пример #11
0
		protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator)
		{
			if (directory == null) throw new ArgumentNullException("directory");
			if (name == null) throw new ArgumentNullException("name");
			if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
			if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");

			this.name = name;
			this.indexDefinition = indexDefinition;
			this.viewGenerator = viewGenerator;
			logIndexing.Debug("Creating index for {0}", name);
			this.directory = directory;

			RecreateSearcher();
		}
Пример #12
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable<object> documents,
            WorkContext context,
            DocumentStorageActions actions)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            Write(indexWriter =>
            {
                string currentId = null;
                PropertyDescriptorCollection properties = null;
                foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context))
                {
                    count++;

                    string newDocId;
                    IEnumerable<AbstractField> fields;
                    if (doc is DynamicJsonObject)
                        fields = ExtractIndexDataFromDocument((DynamicJsonObject) doc, out newDocId);
                    else
                        fields = ExtractIndexDataFromDocument(properties, doc, out newDocId);
                    if (currentId != newDocId) // new document id, so delete all old values matching it
                    {
                        context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryDeleted(name, newDocId));
                        indexWriter.DeleteDocuments(new Term("__document_id", newDocId));
                    }

                    if (newDocId != null)
                    {
                        var luceneDoc = new Document();
                        luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.NOT_ANALYZED));

                        currentId = newDocId;
                        CopyFieldsToDocumentButRemoveDuplicateValues(luceneDoc, fields);
                        context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryCreated(name, newDocId, luceneDoc));
                        log.DebugFormat("Index '{0}' resulted in: {1}", name, luceneDoc);
                        indexWriter.AddDocument(luceneDoc);
                    }

                    actions.IncrementSuccessIndexing();
                }

                return currentId != null;
            });
            log.DebugFormat("Indexed {0} documents for {1}", count, name);
        }
Пример #13
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable<object> documents,
            WorkContext context,
            DocumentStorageActions actions)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            Write(indexWriter =>
            {
                string currentId = null;
                var converter = new AnonymousObjectToLuceneDocumentConverter();
                PropertyDescriptorCollection properties = null;
                foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context))
                {
                    count++;

                    if (properties == null)
                    {
                        properties = TypeDescriptor.GetProperties(doc);
                    }
                    var newDocId = properties.Find("__document_id", false).GetValue(doc) as string;
                    var fields = converter.Index(doc, properties, indexDefinition);
                    if (currentId != newDocId) // new document id, so delete all old values matching it
                    {
                        indexWriter.DeleteDocuments(new Term("__document_id", newDocId));
                    }

                    if (newDocId != null)
                    {
                        var luceneDoc = new Document();
                        luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.UN_TOKENIZED));

                        currentId = newDocId;
                        CopyFieldsToDocumentButRemoveDuplicateValues(luceneDoc, fields);
                        log.DebugFormat("Indexing document {0}", luceneDoc);
                        indexWriter.AddDocument(luceneDoc);
                    }

                    actions.IncrementSuccessIndexing();
                }

                return currentId != null;
            });
            log.DebugFormat("Indexed {0} documents for {1}", count, name);
        }
Пример #14
0
        protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator)
        {
            this.name = name;
            this.indexDefinition = indexDefinition;
            this.viewGenerator = viewGenerator;
            logIndexing.DebugFormat("Creating index for {0}", name);
            this.directory = directory;

            // clear any locks that are currently held
            // this may happen if the server crashed while
            // writing to the index
            this.directory.ClearLock("write.lock");

            searcher = new CurrentIndexSearcher
            {
                Searcher = new IndexSearcher(directory, true)
            };
        }
Пример #15
0
		public Aggregator(AggregationEngine aggregationEngine, string name, AbstractViewGenerator generator)
		{
			_aggregationEngine = aggregationEngine;
			_name = name;
			_generator = generator;

		    _aggStat = "status/" + _name;

	        using (var stream = aggregationEngine.Storage.Reader.Read(_aggStat))
	        {
	            if (stream == null)
	            {
                    _lastAggregatedEtag = Etag.Empty;
	                return;
	            }
	            var status = RavenJObject.Load(new JsonTextReader(new StreamReader(stream)));
	            _lastAggregatedEtag = Etag.Parse(status.Value<string>("@etag"));
	        }
		}
Пример #16
0
		protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator)
		{
			if (directory == null) throw new ArgumentNullException("directory");
			if (name == null) throw new ArgumentNullException("name");
			if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
			if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");

			this.name = name;
			this.indexDefinition = indexDefinition;
			this.viewGenerator = viewGenerator;
			logIndexing.Debug("Creating index for {0}", name);
			this.directory = directory;

			// clear any locks that are currently held
			// this may happen if the server crashed while
			// writing to the index
			this.directory.ClearLock("write.lock");

			RecreateSearcher();
		}
Пример #17
0
		public static Sort GetSort(this IndexQuery self, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator)
		{
			var spatialQuery = self as SpatialIndexQuery;
			var sortedFields = self.SortedFields;
			if (sortedFields == null || sortedFields.Length <= 0)
			{
				if (spatialQuery == null || string.IsNullOrEmpty(self.Query) == false)
					return null;
				sortedFields = new[] { new SortedField(Constants.DistanceFieldName), };
			}

			return new Sort(sortedFields
							.Select(sortedField =>
							{
								if (sortedField.Field == Constants.TemporaryScoreValue)
								{
									return SortField.FIELD_SCORE;
								}
								if (sortedField.Field.StartsWith(Constants.RandomFieldName))
								{
									var parts = sortedField.Field.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
									if (parts.Length < 2) // truly random
										return new RandomSortField(Guid.NewGuid().ToString());
									return new RandomSortField(parts[1]);
								}
								if (spatialQuery != null && sortedField.Field == Constants.DistanceFieldName)
								{
									var spatialField = viewGenerator.GetSpatialField(spatialQuery.SpatialFieldName);
									var shape = spatialField.ReadShape(spatialQuery.QueryShape);
									var dsort = new SpatialDistanceFieldComparatorSource(spatialField, shape.GetCenter());
									return new SortField(Constants.DistanceFieldName, dsort, sortedField.Descending);
								}
								var sortOptions = GetSortOption(indexDefinition, sortedField.Field);
                                
								if (sortOptions == null || sortOptions == SortOptions.None)
									return new SortField(sortedField.Field, CultureInfo.InvariantCulture, sortedField.Descending);
							    
                                if (sortOptions.Value == SortOptions.Short)
							        sortOptions = SortOptions.Int;
							    return new SortField(sortedField.Field, (int)sortOptions.Value, sortedField.Descending);

							})
							.ToArray());
		}
Пример #18
0
		private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete)
		{
			var needToMoveToMultiStep = new HashSet<string>();
			transactionalStorage.Batch(actions =>
			{
				foreach (var localReduceKey in keysToReduce)
				{
					var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey);

					if (lastPerformedReduceType != ReduceType.MultiStep)
						needToMoveToMultiStep.Add(localReduceKey);

					if (lastPerformedReduceType != ReduceType.SingleStep)
						continue;
					// we exceeded the limit of items to reduce in single step
					// now we need to schedule reductions at level 0 for all map results with given reduce key
					var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList();
					foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
					{
						actions.MapReduce.ScheduleReductions(index.IndexName, 0, result);
					}
				}
			});

			for (int i = 0; i < 3; i++)
			{
				var level = i;

				var reduceParams = new GetItemsToReduceParams(
					index.IndexName,
					keysToReduce,
					level,
					true,
					itemsToDelete);

				bool retry = true;
				while (retry && reduceParams.ReduceKeys.Count > 0)
				{
					var reduceBatchAutoThrottlerId = Guid.NewGuid();
					try
					{
						transactionalStorage.Batch(actions =>
						{
							context.CancellationToken.ThrowIfCancellationRequested();

							var batchTimeWatcher = Stopwatch.StartNew();

							reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;
							var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList();
							if (persistedResults.Count == 0)
							{
								retry = false;
								return;
							}

							var count = persistedResults.Count;
							var size = persistedResults.Sum(x => x.Size);
							autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size);

							if (Log.IsDebugEnabled)
							{
								if (persistedResults.Count > 0)
									Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
										persistedResults.Count,
										string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
										index.IndexName, level, batchTimeWatcher.Elapsed));
								else
									Log.Debug("No reduce keys found for {0}", index.IndexName);
							}

							context.CancellationToken.ThrowIfCancellationRequested();

							var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
								.OrderBy(x => x.Bucket)
								.Distinct()
								.ToArray();
							foreach (var mappedResultInfo in requiredReduceNextTime)
							{
								actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey,
									mappedResultInfo.Bucket);
							}

							if (level != 2)
							{
								var reduceKeysAndBuckets = requiredReduceNextTime
									.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
									.Distinct()
									.ToArray();
								foreach (var reduceKeysAndBucket in reduceKeysAndBuckets)
								{
									actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket);
								}
							}

							var results = persistedResults
								.Where(x => x.Data != null)
								.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
								.ToArray();
							var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey),
								StringComparer.InvariantCultureIgnoreCase);
							context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

							context.CancellationToken.ThrowIfCancellationRequested();
							var reduceTimeWatcher = Stopwatch.StartNew();

							context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

							var batchDuration = batchTimeWatcher.Elapsed;
							Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration,
								results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level);

							autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
						});
					}
					finally
					{
						long _;
						autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _);
					}
				}
			}

			foreach (var reduceKey in needToMoveToMultiStep)
			{
				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
										   actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey,
																					   ReduceType.MultiStep));
			}
		}
Пример #19
0
		private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
												ConcurrentSet<object> itemsToDelete)
		{
			var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>();

			Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
			var batchTimeWatcher = Stopwatch.StartNew();
			var count = 0;
			var size = 0;
			var state = new ConcurrentQueue<Tuple<HashSet<string>, List<MappedResultInfo>>>();
			var reducingBatchThrottlerId = Guid.NewGuid();
			try
			{
				BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
				{
					var localNeedToMoveToSingleStep = new HashSet<string>();
					needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
					var localKeys = new HashSet<string>();
					while (enumerator.MoveNext())
					{
						localKeys.Add(enumerator.Current);
					}

					transactionalStorage.Batch(actions =>
					{
						var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0,
							loadData: false,
							itemsToDelete: itemsToDelete)
						{
							Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway
						};

					
						var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList();
						autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size));
						if (scheduledItems.Count == 0)
						{
							if (Log.IsWarnEnabled)
							{
								Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])",
									string.Join(", ", keysToReduce));
							}
							// Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
							// and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
							// They shouldn't be here, and indeed, we remove them just a little down from here in this function.
							// That said, they might bave smuggled in between versions, or something happened to cause them to be here.
							// In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on
							foreach (var reduceKey in keysToReduce)
							{
								actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey);
								actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey);
							}
						}

						foreach (var reduceKey in localKeys)
						{
							var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey);

							if (lastPerformedReduceType != ReduceType.SingleStep)
								localNeedToMoveToSingleStep.Add(reduceKey);

							if (lastPerformedReduceType != ReduceType.MultiStep)
								continue;

							Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
								reduceKey);

							// now we are in single step but previously multi step reduce was performed for the given key
							var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList();

							// add scheduled items too to be sure we will delete reduce results of already deleted documents
							mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));

							foreach (var mappedBucket in mappedBuckets.Distinct())
							{
								actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket);
								actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024);
							}
						}

						var mappedResults = actions.MapReduce.GetMappedResults(
							index.IndexName,
							localKeys,
							loadData: true
							).ToList();

						Interlocked.Add(ref count, mappedResults.Count);
						Interlocked.Add(ref size, mappedResults.Sum(x => x.Size));

						mappedResults.ApplyIfNotNull(x => x.Bucket = 0);

						state.Enqueue(Tuple.Create(localKeys, mappedResults));
					});
				});

				var reduceKeys = new HashSet<string>(state.SelectMany(x => x.Item1));

				var results = state.SelectMany(x => x.Item2)
					.Where(x => x.Data != null)
					.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
					.ToArray();
				context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length);

				context.TransactionalStorage.Batch(actions =>
					context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x=>x.Item2.Count))
					);

				autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);

				var needToMoveToSingleStep = new HashSet<string>();
				HashSet<string> set;
				while (needToMoveToSingleStepQueue.TryDequeue(out set))
				{
					needToMoveToSingleStep.UnionWith(set);
				}

				foreach (var reduceKey in needToMoveToSingleStep)
				{
					string localReduceKey = reduceKey;
					transactionalStorage.Batch(actions =>
						actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep));
				}
			}
			finally
			{
				long _;
				autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _);
			}
		}
Пример #20
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
Пример #21
0
		public SimpleIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
			: base(directory, id, indexDefinition, viewGenerator, context)
		{
		}
Пример #22
0
        private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator)
        {
            const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName";

            PrecomputedIndexingBatch result = null;

            var docsToIndex = new List<JsonDocument>();
            TransactionalStorage.Batch(actions =>
            {
                var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList();

                var query = string.Join(" OR ", tags);

	            JsonDocument highestByEtag = null;

                var cts = new CancellationTokenSource();
                using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken))
                using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery
                {
                    Query = query,
					PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch
                }, actions, linked)
                {
                    ShouldSkipDuplicateChecking = true
                })
                {
                    op.Init();
                    if (op.Header.TotalResults == 0 ||
                        (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch))
                    {
                        // we don't apply this optimization if the total number of results 
						// to index is more than the max numbers to index in a single batch. 
						// The idea here is that we need to keep the amount
                        // of memory we use to a manageable level even when introducing a new index to a BIG 
                        // database
                        try
                        {
                            cts.Cancel();
                            // we have to run just a little bit of the query to properly setup the disposal
                            op.Execute(o => { });
                        }
                        catch (OperationCanceledException)
                        {
                        }
                        return;
                    }

                    Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index,
                              op.Header.TotalResults);
                    op.Execute(document =>
                    {
                        var metadata = document.Value<RavenJObject>(Constants.Metadata);
                        var key = metadata.Value<string>("@id");
                        var etag = Etag.Parse(metadata.Value<string>("@etag"));
                        var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified));
                        document.Remove(Constants.Metadata);

	                    var doc = new JsonDocument
	                    {
		                    DataAsJson = document,
		                    Etag = etag,
		                    Key = key,
		                    LastModified = lastModified,
		                    SkipDeleteFromIndex = true,
		                    Metadata = metadata
	                    };

	                    docsToIndex.Add(doc);

	                    if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0)
		                    highestByEtag = doc;
                    });
                }

	            result = new PrecomputedIndexingBatch
                {
                    LastIndexed = highestByEtag.Etag,
                    LastModified = highestByEtag.LastModified.Value,
                    Documents = docsToIndex,
                    Index = index
                };
            });

            if (result != null && result.Documents != null && result.Documents.Count > 0)
                Database.IndexingExecuter.IndexPrecomputedBatch(result);

        }
Пример #23
0
		public IndexingPerformanceStats Reduce(
			int index,
			AbstractViewGenerator viewGenerator,
			IEnumerable<IGrouping<int, object>> mappedResults,
			int level,
			WorkContext context,
			IStorageActionsAccessor actions,
			HashSet<string> reduceKeys,
			int inputCount)
		{
			Index value = indexes[index];
			if (value == null)
			{
				log.Debug("Tried to index on a non existent index {0}, ignoring", index);
				return null;
			}
			var mapReduceIndex = value as MapReduceIndex;
			if (mapReduceIndex == null)
			{
				log.Warn("Tried to reduce on an index that is not a map/reduce index: {0}, ignoring", index);
				return null;
			}
			using (EnsureInvariantCulture())
			{
				var reduceDocuments = new MapReduceIndex.ReduceDocuments(mapReduceIndex, viewGenerator, mappedResults, level, context, actions, reduceKeys, inputCount);

				var performance = reduceDocuments.ExecuteReduction();

				context.RaiseIndexChangeNotification(new IndexChangeNotification
				{
					Name = value.PublicName,
					Type = IndexChangeTypes.ReduceCompleted
				});

				return performance;
			}
		}
Пример #24
0
		public IndexingPerformanceStats Index(int index, AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			Index value;
			if (indexes.TryGetValue(index, out value) == false)
			{
				log.Debug("Tried to index on a non existent index {0}, ignoring", index);
				return null;
			}
			using (EnsureInvariantCulture())
			using (DocumentCacher.SkipSettingDocumentsInDocumentCache())
			{
				var performance = value.IndexDocuments(viewGenerator, batch, actions, minimumTimestamp, token);
				context.RaiseIndexChangeNotification(new IndexChangeNotification
				{
					Name = value.PublicName,
					Type = IndexChangeTypes.MapCompleted
				});

				return performance;
			}
		}
Пример #25
0
		protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
		{
		    currentIndexSearcherHolder = new IndexSearcherHolder(name ,context);
		    if (directory == null) throw new ArgumentNullException("directory");
			if (name == null) throw new ArgumentNullException("name");
			if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
			if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");

			this.name = name;
			this.indexDefinition = indexDefinition;
			this.viewGenerator = viewGenerator;
			this.context = context;
		    logIndexing.Debug("Creating index for {0}", name);
			this.directory = directory;
			flushSize = context.Configuration.FlushIndexToDiskSizeInMb * 1024 * 1024;

			RecreateSearcher();
		}
Пример #26
0
	    public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
        {
            if (string.IsNullOrWhiteSpace(indexQuery.Query))
                return;
            HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
            foreach (string field in hashSet)
            {
                string f = field;
                if (f.EndsWith("_Range"))
                {
                    f = f.Substring(0, f.Length - "_Range".Length);
                }
                if (viewGenerator.ContainsField(f) == false &&
                    viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
                    throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
            }

            if (indexQuery.SortedFields == null)
                return;

            foreach (SortedField field in indexQuery.SortedFields)
            {
                string f = field.Field;
                if (f == Constants.TemporaryScoreValue)
                    continue;
                if (f.EndsWith("_Range"))
                {
                    f = f.Substring(0, f.Length - "_Range".Length);
                }
                if (f.StartsWith(Constants.RandomFieldName))
                    continue;
                if (viewGenerator.ContainsField(f) == false && f != Constants.DistanceFieldName
                    && viewGenerator.ContainsField("_") == false)// the catch all field name means that we have dynamic fields names
                    throw new ArgumentException("The field '" + f + "' is not indexed, cannot sort on fields that are not indexed");
            }
        }
Пример #27
0
		public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
Пример #28
0
		private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator,
												          ConcurrentSet<object> itemsToDelete, CancellationToken token)
		{
			var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>();

            if ( Log.IsDebugEnabled )
			    Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce)));

			var batchTimeWatcher = Stopwatch.StartNew();

			var reducingBatchThrottlerId = Guid.NewGuid();
			var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep);
			var reduceLevelStats = new ReduceLevelPeformanceStats
			{
				Started = SystemTime.UtcNow,
				Level = 2
			};

			try
			{
				var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

				var parallelProcessingStart = SystemTime.UtcNow;

				BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
				{
					var parallelStats = new ParallelBatchStats
					{
						StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
					};

					var localNeedToMoveToSingleStep = new HashSet<string>();
					needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
					var localKeys = new HashSet<string>();
					while (enumerator.MoveNext())
					{
						token.ThrowIfCancellationRequested();

						localKeys.Add(enumerator.Current);
					}

					transactionalStorage.Batch(actions =>
					{
						var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete)
						{
							Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway
						};

						var getItemsToReduceDuration = Stopwatch.StartNew();

                        int scheduledItemsSum = 0;
                        int scheduledItemsCount = 0;
                        List<int> scheduledItemsMappedBuckets = new List<int>();
						using (StopwatchScope.For(getItemsToReduceDuration))
						{
                            foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token))
                            {
                                scheduledItemsMappedBuckets.Add(item.Bucket);
                                scheduledItemsSum += item.Size;
                                scheduledItemsCount++;
                            }
						}

						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds));

						autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum);

                        if (scheduledItemsCount == 0)
						{						    
							// Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
							// and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
							// They shouldn't be here, and indeed, we remove them just a little down from here in this function.
							// That said, they might have smuggled in between versions, or something happened to cause them to be here.
							// In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on

                            Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce));

							var deletingScheduledReductionsDuration = Stopwatch.StartNew();

							using (StopwatchScope.For(deletingScheduledReductionsDuration))
							{
								foreach (var reduceKey in keysToReduce)
								{
									token.ThrowIfCancellationRequested();

									actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
									actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
								}
							}

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds));
						}

						var removeReduceResultsDuration = new Stopwatch();

						foreach (var reduceKey in localKeys)
						{
							token.ThrowIfCancellationRequested();

							var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);

							if (lastPerformedReduceType != ReduceType.SingleStep)
								localNeedToMoveToSingleStep.Add(reduceKey);

							if (lastPerformedReduceType != ReduceType.MultiStep)
								continue;

                            if ( Log.IsDebugEnabled )
                            {
                                Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey);
                            }

							using (StopwatchScope.For(removeReduceResultsDuration))
							{
                                // now we are in single step but previously multi step reduce was performed for the given key
                                var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token);        

                                // add scheduled items too to be sure we will delete reduce results of already deleted documents
								foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets))
								{
									actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
									actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
								}
							}
						}

						parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));

						parallelOperations.Enqueue(parallelStats);
					});
				});

				reduceLevelStats.Operations.Add(new ParallelPerformanceStats
				{
					NumberOfThreads = parallelOperations.Count,
					DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
					BatchedOperations = parallelOperations.ToList()
				});

				var getMappedResultsDuration = new Stopwatch();				

				var reductionPerformanceStats = new List<IndexingPerformanceStats>();
                
                var keysLeftToReduce = new HashSet<string>(keysToReduce);                                              
				while (keysLeftToReduce.Count > 0)
				{
                    var keysReturned = new HashSet<string>();       

                    // Try to diminish the allocations happening because of .Resize()
                    var mappedResults = new List<MappedResultInfo>(keysLeftToReduce.Count);                             
                    
                    context.TransactionalStorage.Batch(actions =>
					{
						var take = context.CurrentNumberOfItemsToReduceInSingleBatch;

						using (StopwatchScope.For(getMappedResultsDuration))
						{
                            mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults);
						}
					});

					var count = mappedResults.Count;

                    int size = 0;
                    foreach ( var item in mappedResults )
                    {
                        item.Bucket = 0;
                        size += item.Size;
                    }
                        
                    var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray();

					context.MetricsCounters.ReducedPerSecond.Mark(results.Length);

					token.ThrowIfCancellationRequested();

                    var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count);

					reductionPerformanceStats.Add(performance);

					autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);
				}

				var needToMoveToSingleStep = new HashSet<string>();

				HashSet<string> set;
				while (needToMoveToSingleStepQueue.TryDequeue(out set))
				{
					needToMoveToSingleStep.UnionWith(set);
				}

				foreach (var reduceKey in needToMoveToSingleStep)
				{
					string localReduceKey = reduceKey;
					transactionalStorage.Batch(actions =>
						actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep));
				}

				reduceLevelStats.Completed = SystemTime.UtcNow;
				reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started;
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index

				foreach (var stats in reductionPerformanceStats)
				{
					reduceLevelStats.Add(stats);
				}

				reducePerformanceStats.LevelStats.Add(reduceLevelStats);
			}
			finally
			{
				long _;
				autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
			}

			return reducePerformanceStats;
		}
Пример #29
0
		private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token)
		{
			var needToMoveToMultiStep = new HashSet<string>();
			transactionalStorage.Batch(actions =>
			{
				foreach (var localReduceKey in keysToReduce)
				{
					token.ThrowIfCancellationRequested();

					var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);

					if (lastPerformedReduceType != ReduceType.MultiStep)
						needToMoveToMultiStep.Add(localReduceKey);

					if (lastPerformedReduceType != ReduceType.SingleStep)
						continue;

					// we exceeded the limit of items to reduce in single step
					// now we need to schedule reductions at level 0 for all map results with given reduce key
					var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList();
					foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
					{
						actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
					}
				}
			});

			var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep);

            var keysToReduceSet = new HashSet<string>(keysToReduce);

			for (int i = 0; i < 3; i++)
			{
				var level = i;

				var reduceLevelStats = new ReduceLevelPeformanceStats()
				{
					Level = level,
					Started = SystemTime.UtcNow,
				};

				var reduceParams = new GetItemsToReduceParams(
					index.IndexId,
                    keysToReduceSet,
					level,
					true,
					itemsToDelete);

				var gettingItemsToReduceDuration = new Stopwatch();
				var scheduleReductionsDuration = new Stopwatch();
				var removeReduceResultsDuration = new Stopwatch();
				var storageCommitDuration = new Stopwatch();

				bool retry = true;
				while (retry && reduceParams.ReduceKeys.Count > 0)
				{
					var reduceBatchAutoThrottlerId = Guid.NewGuid();
					try
					{
						transactionalStorage.Batch(actions =>
						{
							token.ThrowIfCancellationRequested();

							actions.BeforeStorageCommit += storageCommitDuration.Start;
							actions.AfterStorageCommit += storageCommitDuration.Stop;

							var batchTimeWatcher = Stopwatch.StartNew();

							reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;

                            int size = 0;                  
          
                            IList<MappedResultInfo> persistedResults;
                            var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
							using (StopwatchScope.For(gettingItemsToReduceDuration))
							{
                                persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token);                                

                                foreach (var item in persistedResults)
                                {
                                    reduceKeys.Add(item.ReduceKey);
                                    size += item.Size;
                                }
							}

                            if (persistedResults.Count == 0)
							{
								retry = false;
								return;
							}

                            var count = persistedResults.Count;

							autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);

							if (Log.IsDebugEnabled)
							{
                                if (persistedResults.Count > 0)
                                {
                                    Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
                                        persistedResults.Count,
                                        string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
                                        index.IndexId, level, batchTimeWatcher.Elapsed));
                                }
								else
                                {
                                    Log.Debug("No reduce keys found for {0}", index.IndexId);
                                }									
							}

							token.ThrowIfCancellationRequested();


                            var requiredReduceNextTimeSet = new HashSet<ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

							using (StopwatchScope.For(removeReduceResultsDuration))
							{
                                foreach (var mappedResultInfo in requiredReduceNextTimeSet)
								{
									token.ThrowIfCancellationRequested();

									actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket);
								}
							}

							if (level != 2)
							{
                                var reduceKeysAndBucketsSet = new HashSet<ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance);

								using (StopwatchScope.For(scheduleReductionsDuration))
								{
                                    foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet)
									{
										token.ThrowIfCancellationRequested();

										actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
									}
								}
							}

							token.ThrowIfCancellationRequested();

							var reduceTimeWatcher = Stopwatch.StartNew();

                            var results = persistedResults.Where(x => x.Data != null)
                                                          .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data));                            

                            var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);

                            context.MetricsCounters.ReducedPerSecond.Mark(results.Count());

							reduceLevelStats.Add(performance);

							var batchDuration = batchTimeWatcher.Elapsed;

                            if ( Log.IsDebugEnabled )
                            {
                                Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.IndexId, reduceTimeWatcher.Elapsed, level);
                            }

							autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
						});
					}
					finally
					{
						long _;
						autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
					}
				}

				reduceLevelStats.Completed = SystemTime.UtcNow;
				reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started;

				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds));
				reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));

				reducePerformance.LevelStats.Add(reduceLevelStats);
			}

			foreach (var reduceKey in needToMoveToMultiStep)
			{
				token.ThrowIfCancellationRequested();

				string localReduceKey = reduceKey;
				transactionalStorage.Batch(actions =>
										   actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey,
																					   ReduceType.MultiStep));
			}

			return reducePerformance;
		}
Пример #30
0
		public override void IndexDocuments(
			AbstractViewGenerator viewGenerator,
			IndexingBatch batch,
			IStorageActionsAccessor actions,
			DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			var deleted = new Dictionary<ReduceKeyAndBucket, int>();
			var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count);
			batch.SetIndexingPerformance(indexPerfStats);

			var documentsWrapped = batch.Docs.Select(doc =>
			{
				sourceCount++;
				var documentId = doc.__document_id;
				actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted);
				return doc;
			})
				.Where(x => x is FilteredDocument == false)
				.ToList();
			var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
			var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
			var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>();

			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition =>
			{
				var localStats = new IndexingWorkStats();
				var localChanges = new HashSet<ReduceKeyAndBucket>();
				var statsPerKey = new Dictionary<string, int>();
				allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey));

				using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
				{
					// we are writing to the transactional store from multiple threads here, and in a streaming fashion
					// should result in less memory and better perf
					context.TransactionalStorage.Batch(accessor =>
					{
						var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats);
						var currentDocumentResults = new List<object>();
						string currentKey = null;
						bool skipDocument = false;
						foreach (var currentDoc in mapResults)
						{
							var documentId = GetDocumentId(currentDoc);
							if (documentId != currentKey)
							{
								count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
								currentDocumentResults.Clear();
								currentKey = documentId;
							}
							else if (skipDocument)
							{
								continue;
							}
							currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer)));

							if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false)
							{
								skipDocument = true;
								currentDocumentResults.Clear();
								continue;
							}

							Interlocked.Increment(ref localStats.IndexingSuccesses);
						}
						count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey);
					});
					allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
					allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
					Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
					Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
				}
			});



			UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);

			var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys)
					.Distinct()
					.ToList();

			var stats = new IndexingWorkStats(allState.Select(x => x.Item2));
			var reduceKeyStats = allState.SelectMany(x => x.Item3)
										 .GroupBy(x => x.Key)
										 .Select(g => new { g.Key, Count = g.Sum(x => x.Value) })
										 .ToList();

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					var reduceKeyStat = enumerator.Current;
					accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count);
				}
			}));

			BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor =>
			{
				while (enumerator.MoveNext())
				{
					accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current);
				}
			}));


			UpdateIndexingStats(context, stats);
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = documentsWrapped.Count,
				Operation = "Map",
				Duration = sw.Elapsed,
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			BatchCompleted("Current Map");
			logIndexing.Debug("Mapped {0} documents for {1}", count, indexId);
		}