public AnonymousObjectToLuceneDocumentConverter(DocumentDatabase database, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, ILog log) { this.database = database; this.indexDefinition = indexDefinition; this.viewGenerator = viewGenerator; this.log = log; }
public MapReduceIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context) : base(directory, id, indexDefinition, viewGenerator, context) { jsonSerializer = JsonExtensions.CreateDefaultJsonSerializer(); jsonSerializer.Converters = MapReduceConverters; }
public IndexPropertyBatcher(DocumentDatabase database, IndexedPropertiesSetupDoc setupDoc, string index, AbstractViewGenerator viewGenerator) { this.database = database; this.setupDoc = setupDoc; this.index = index; this.viewGenerator = viewGenerator; }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<dynamic> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { actions.Indexing.SetCurrentIndexStatsTo(name); var count = 0; Func<object, object> documentIdFetcher = null; var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase); var documentsWrapped = documents.Select(doc => { var documentId = doc.__document_id; foreach (var reduceKey in actions.MappedResults.DeleteMappedResultsForDocumentId((string)documentId, name)) { reduceKeys.Add(reduceKey); } return doc; }); foreach (var doc in RobustEnumeration(documentsWrapped, viewGenerator.MapDefinition, actions, context)) { count++; documentIdFetcher = CreateDocumentIdFetcherIfNeeded(documentIdFetcher, doc); var docIdValue = documentIdFetcher(doc); if (docIdValue == null) throw new InvalidOperationException("Could not find document id for this document"); var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, docIdValue); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = docIdValue.ToString(); reduceKeys.Add(reduceKey); var data = GetMapedData(doc); logIndexing.DebugFormat("Mapped result for '{0}': '{1}'", name, data); var hash = ComputeHash(name, reduceKey); actions.MappedResults.PutMappedResult(name, docId, reduceKey, data, hash); actions.Indexing.IncrementSuccessIndexing(); } actions.Tasks.AddTask(new ReduceTask { Index = name, ReduceKeys = reduceKeys.ToArray() }, minimumTimestamp); logIndexing.DebugFormat("Mapped {0} documents for {1}", count, name); }
public MapReduceIndex(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context) : base(directory, name, indexDefinition, viewGenerator, context) { jsonSerializer = new JsonSerializer(); foreach (var jsonConverter in Default.Converters) { jsonSerializer.Converters.Add(jsonConverter); } }
public MapReduceIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context) : base(directory, id, indexDefinition, viewGenerator, context) { jsonSerializer = new JsonSerializer(); foreach (var jsonConverter in Default.Converters) { jsonSerializer.Converters.Add(jsonConverter); } jsonSerializer.Converters.Add(new IgnoreFieldable()); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, DocumentStorageActions actions) { actions.SetCurrentIndexStatsTo(name); var count = 0; Func<object, object> documentIdFetcher = null; var reduceKeys = new HashSet<string>(); foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context)) { count++; documentIdFetcher = CreateDocumentIdFetcherIfNeeded(documentIdFetcher, doc); var docIdValue = documentIdFetcher(doc); if (docIdValue == null) throw new InvalidOperationException("Could not find document id for this document"); var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { log.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, docIdValue); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = docIdValue.ToString(); reduceKeys.Add(reduceKey); string data = GetMapedData(doc); log.DebugFormat("Mapped result for '{0}': '{1}'", name, data); var hash = ComputeHash(name, reduceKey); actions.PutMappedResult(name, docId, reduceKey, data, hash); actions.IncrementSuccessIndexing(); } foreach (var reduceKey in reduceKeys) { actions.AddTask(new ReduceTask { Index = name, ReduceKey = reduceKey }); } log.DebugFormat("Mapped {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, DocumentStorageActions actions) { actions.SetCurrentIndexStatsTo(name); var count = 0; PropertyDescriptor groupByPropertyDescriptor = null; PropertyDescriptor documentIdPropertyDescriptor = null; var reduceKeys = new HashSet<string>(); foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context)) { count++; if (groupByPropertyDescriptor == null) { var props = TypeDescriptor.GetProperties(doc); groupByPropertyDescriptor = props.Find(viewGenerator.GroupByField, false); documentIdPropertyDescriptor = props.Find("__document_id", false); } var docIdValue = documentIdPropertyDescriptor.GetValue(doc); if (docIdValue == null) throw new InvalidOperationException("Could not find document id for this document"); var reduceValue = groupByPropertyDescriptor.GetValue(doc); if (reduceValue == null) { log.DebugFormat("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByField, docIdValue); continue; } var reduceKey = reduceValue.ToString(); var docId = docIdValue.ToString(); reduceKeys.Add(reduceKey); actions.PutMappedResult(name, docId, reduceKey, JObject.FromObject(doc).ToString(Formatting.None)); actions.IncrementSuccessIndexing(); } foreach (var reduceKey in reduceKeys) { actions.AddTask(new ReduceTask { Index = name, ReduceKey = reduceKey }); } log.DebugFormat("Mapped {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions) { actions.Indexing.SetCurrentIndexStatsTo(name); var count = 0; Write(indexWriter => { bool madeChanges = false; PropertyDescriptorCollection properties = null; var processedKeys = new HashSet<string>(); var documentsWrapped = documents.Select((dynamic doc) => { var documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; madeChanges = true; context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryDeleted(name, documentId)); indexWriter.DeleteDocuments(new Term("__document_id", documentId)); return doc; }); foreach (var doc in RobustEnumeration(documentsWrapped, viewGenerator.MapDefinition, actions, context)) { count++; string newDocId; IEnumerable<AbstractField> fields; if (doc is DynamicJsonObject) fields = ExtractIndexDataFromDocument((DynamicJsonObject) doc, out newDocId); else fields = ExtractIndexDataFromDocument(properties, doc, out newDocId); if (newDocId != null) { var luceneDoc = new Document(); luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.NOT_ANALYZED)); madeChanges = true; CopyFieldsToDocument(luceneDoc, fields); context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryCreated(name, newDocId, luceneDoc)); log.DebugFormat("Index '{0}' resulted in: {1}", name, luceneDoc); indexWriter.AddDocument(luceneDoc); } actions.Indexing.IncrementSuccessIndexing(); } return madeChanges; }); log.DebugFormat("Indexed {0} documents for {1}", count, name); }
public Etag OptimizeCutoffForIndex(AbstractViewGenerator viewGenerator, Etag cutoffEtag) { if (cutoffEtag != null) return cutoffEtag; if (viewGenerator.ReduceDefinition == null && viewGenerator.ForEntityNames.Count > 0) { var etags = viewGenerator.ForEntityNames.Select(GetLastEtagForCollection) .Where(x=> x != null); if (etags.Any()) return etags.Max(); } return null; }
protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator) { if (directory == null) throw new ArgumentNullException("directory"); if (name == null) throw new ArgumentNullException("name"); if (indexDefinition == null) throw new ArgumentNullException("indexDefinition"); if (viewGenerator == null) throw new ArgumentNullException("viewGenerator"); this.name = name; this.indexDefinition = indexDefinition; this.viewGenerator = viewGenerator; logIndexing.Debug("Creating index for {0}", name); this.directory = directory; RecreateSearcher(); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, DocumentStorageActions actions) { actions.SetCurrentIndexStatsTo(name); var count = 0; Write(indexWriter => { string currentId = null; PropertyDescriptorCollection properties = null; foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context)) { count++; string newDocId; IEnumerable<AbstractField> fields; if (doc is DynamicJsonObject) fields = ExtractIndexDataFromDocument((DynamicJsonObject) doc, out newDocId); else fields = ExtractIndexDataFromDocument(properties, doc, out newDocId); if (currentId != newDocId) // new document id, so delete all old values matching it { context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryDeleted(name, newDocId)); indexWriter.DeleteDocuments(new Term("__document_id", newDocId)); } if (newDocId != null) { var luceneDoc = new Document(); luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.NOT_ANALYZED)); currentId = newDocId; CopyFieldsToDocumentButRemoveDuplicateValues(luceneDoc, fields); context.IndexUpdateTriggers.Apply(trigger => trigger.OnIndexEntryCreated(name, newDocId, luceneDoc)); log.DebugFormat("Index '{0}' resulted in: {1}", name, luceneDoc); indexWriter.AddDocument(luceneDoc); } actions.IncrementSuccessIndexing(); } return currentId != null; }); log.DebugFormat("Indexed {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, DocumentStorageActions actions) { actions.SetCurrentIndexStatsTo(name); var count = 0; Write(indexWriter => { string currentId = null; var converter = new AnonymousObjectToLuceneDocumentConverter(); PropertyDescriptorCollection properties = null; foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context)) { count++; if (properties == null) { properties = TypeDescriptor.GetProperties(doc); } var newDocId = properties.Find("__document_id", false).GetValue(doc) as string; var fields = converter.Index(doc, properties, indexDefinition); if (currentId != newDocId) // new document id, so delete all old values matching it { indexWriter.DeleteDocuments(new Term("__document_id", newDocId)); } if (newDocId != null) { var luceneDoc = new Document(); luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.UN_TOKENIZED)); currentId = newDocId; CopyFieldsToDocumentButRemoveDuplicateValues(luceneDoc, fields); log.DebugFormat("Indexing document {0}", luceneDoc); indexWriter.AddDocument(luceneDoc); } actions.IncrementSuccessIndexing(); } return currentId != null; }); log.DebugFormat("Indexed {0} documents for {1}", count, name); }
protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator) { this.name = name; this.indexDefinition = indexDefinition; this.viewGenerator = viewGenerator; logIndexing.DebugFormat("Creating index for {0}", name); this.directory = directory; // clear any locks that are currently held // this may happen if the server crashed while // writing to the index this.directory.ClearLock("write.lock"); searcher = new CurrentIndexSearcher { Searcher = new IndexSearcher(directory, true) }; }
public Aggregator(AggregationEngine aggregationEngine, string name, AbstractViewGenerator generator) { _aggregationEngine = aggregationEngine; _name = name; _generator = generator; _aggStat = "status/" + _name; using (var stream = aggregationEngine.Storage.Reader.Read(_aggStat)) { if (stream == null) { _lastAggregatedEtag = Etag.Empty; return; } var status = RavenJObject.Load(new JsonTextReader(new StreamReader(stream))); _lastAggregatedEtag = Etag.Parse(status.Value<string>("@etag")); } }
protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator) { if (directory == null) throw new ArgumentNullException("directory"); if (name == null) throw new ArgumentNullException("name"); if (indexDefinition == null) throw new ArgumentNullException("indexDefinition"); if (viewGenerator == null) throw new ArgumentNullException("viewGenerator"); this.name = name; this.indexDefinition = indexDefinition; this.viewGenerator = viewGenerator; logIndexing.Debug("Creating index for {0}", name); this.directory = directory; // clear any locks that are currently held // this may happen if the server crashed while // writing to the index this.directory.ClearLock("write.lock"); RecreateSearcher(); }
public static Sort GetSort(this IndexQuery self, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator) { var spatialQuery = self as SpatialIndexQuery; var sortedFields = self.SortedFields; if (sortedFields == null || sortedFields.Length <= 0) { if (spatialQuery == null || string.IsNullOrEmpty(self.Query) == false) return null; sortedFields = new[] { new SortedField(Constants.DistanceFieldName), }; } return new Sort(sortedFields .Select(sortedField => { if (sortedField.Field == Constants.TemporaryScoreValue) { return SortField.FIELD_SCORE; } if (sortedField.Field.StartsWith(Constants.RandomFieldName)) { var parts = sortedField.Field.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length < 2) // truly random return new RandomSortField(Guid.NewGuid().ToString()); return new RandomSortField(parts[1]); } if (spatialQuery != null && sortedField.Field == Constants.DistanceFieldName) { var spatialField = viewGenerator.GetSpatialField(spatialQuery.SpatialFieldName); var shape = spatialField.ReadShape(spatialQuery.QueryShape); var dsort = new SpatialDistanceFieldComparatorSource(spatialField, shape.GetCenter()); return new SortField(Constants.DistanceFieldName, dsort, sortedField.Descending); } var sortOptions = GetSortOption(indexDefinition, sortedField.Field); if (sortOptions == null || sortOptions == SortOptions.None) return new SortField(sortedField.Field, CultureInfo.InvariantCulture, sortedField.Descending); if (sortOptions.Value == SortOptions.Short) sortOptions = SortOptions.Int; return new SortField(sortedField.Field, (int)sortOptions.Value, sortedField.Descending); }) .ToArray()); }
private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete) { var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexName, 0, result); } } }); for (int i = 0; i < 3; i++) { var level = i; var reduceParams = new GetItemsToReduceParams( index.IndexName, keysToReduce, level, true, itemsToDelete); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList(); if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; var size = persistedResults.Sum(x => x.Size); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, batchTimeWatcher.Elapsed)); else Log.Debug("No reduce keys found for {0}", index.IndexName); } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); foreach (var reduceKeysAndBucket in reduceKeysAndBuckets) { actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket); } } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); var batchDuration = batchTimeWatcher.Elapsed; Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level); autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _); } } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete) { var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue<Tuple<HashSet<string>, List<MappedResultInfo>>>(); var reducingBatchThrottlerId = Guid.NewGuid(); try { BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localNeedToMoveToSingleStep = new HashSet<string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size)); if (scheduledItems.Count == 0) { if (Log.IsWarnEnabled) { Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); } // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might bave smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on foreach (var reduceKey in keysToReduce) { actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey); } } foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) localNeedToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet<string>(state.SelectMany(x => x.Item1)); var results = state.SelectMany(x => x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x=>x.Item2.Count)) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); var needToMoveToSingleStep = new HashSet<string>(); HashSet<string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _); } }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; int loadDocumentCount = 0; long loadDocumentDuration = 0; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count); batch.SetIndexingPerformance(indexingPerfStats); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
public SimpleIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context) : base(directory, id, indexDefinition, viewGenerator, context) { }
private void ApplyPrecomputedBatchForNewIndex(Index index, AbstractViewGenerator generator) { const string DocumentsByEntityNameIndex = "Raven/DocumentsByEntityName"; PrecomputedIndexingBatch result = null; var docsToIndex = new List<JsonDocument>(); TransactionalStorage.Batch(actions => { var tags = generator.ForEntityNames.Select(entityName => "Tag:[[" + entityName + "]]").ToList(); var query = string.Join(" OR ", tags); JsonDocument highestByEtag = null; var cts = new CancellationTokenSource(); using (var linked = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, WorkContext.CancellationToken)) using (var op = new QueryActions.DatabaseQueryOperation(Database, DocumentsByEntityNameIndex, new IndexQuery { Query = query, PageSize = Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch }, actions, linked) { ShouldSkipDuplicateChecking = true }) { op.Init(); if (op.Header.TotalResults == 0 || (op.Header.TotalResults > Database.Configuration.MaxNumberOfItemsToProcessInSingleBatch)) { // we don't apply this optimization if the total number of results // to index is more than the max numbers to index in a single batch. // The idea here is that we need to keep the amount // of memory we use to a manageable level even when introducing a new index to a BIG // database try { cts.Cancel(); // we have to run just a little bit of the query to properly setup the disposal op.Execute(o => { }); } catch (OperationCanceledException) { } return; } Log.Debug("For new index {0}, using precomputed indexing batch optimization for {1} docs", index, op.Header.TotalResults); op.Execute(document => { var metadata = document.Value<RavenJObject>(Constants.Metadata); var key = metadata.Value<string>("@id"); var etag = Etag.Parse(metadata.Value<string>("@etag")); var lastModified = DateTime.Parse(metadata.Value<string>(Constants.LastModified)); document.Remove(Constants.Metadata); var doc = new JsonDocument { DataAsJson = document, Etag = etag, Key = key, LastModified = lastModified, SkipDeleteFromIndex = true, Metadata = metadata }; docsToIndex.Add(doc); if (highestByEtag == null || doc.Etag.CompareTo(highestByEtag.Etag) > 0) highestByEtag = doc; }); } result = new PrecomputedIndexingBatch { LastIndexed = highestByEtag.Etag, LastModified = highestByEtag.LastModified.Value, Documents = docsToIndex, Index = index }; }); if (result != null && result.Documents != null && result.Documents.Count > 0) Database.IndexingExecuter.IndexPrecomputedBatch(result); }
public IndexingPerformanceStats Reduce( int index, AbstractViewGenerator viewGenerator, IEnumerable<IGrouping<int, object>> mappedResults, int level, WorkContext context, IStorageActionsAccessor actions, HashSet<string> reduceKeys, int inputCount) { Index value = indexes[index]; if (value == null) { log.Debug("Tried to index on a non existent index {0}, ignoring", index); return null; } var mapReduceIndex = value as MapReduceIndex; if (mapReduceIndex == null) { log.Warn("Tried to reduce on an index that is not a map/reduce index: {0}, ignoring", index); return null; } using (EnsureInvariantCulture()) { var reduceDocuments = new MapReduceIndex.ReduceDocuments(mapReduceIndex, viewGenerator, mappedResults, level, context, actions, reduceKeys, inputCount); var performance = reduceDocuments.ExecuteReduction(); context.RaiseIndexChangeNotification(new IndexChangeNotification { Name = value.PublicName, Type = IndexChangeTypes.ReduceCompleted }); return performance; } }
public IndexingPerformanceStats Index(int index, AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { Index value; if (indexes.TryGetValue(index, out value) == false) { log.Debug("Tried to index on a non existent index {0}, ignoring", index); return null; } using (EnsureInvariantCulture()) using (DocumentCacher.SkipSettingDocumentsInDocumentCache()) { var performance = value.IndexDocuments(viewGenerator, batch, actions, minimumTimestamp, token); context.RaiseIndexChangeNotification(new IndexChangeNotification { Name = value.PublicName, Type = IndexChangeTypes.MapCompleted }); return performance; } }
protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context) { currentIndexSearcherHolder = new IndexSearcherHolder(name ,context); if (directory == null) throw new ArgumentNullException("directory"); if (name == null) throw new ArgumentNullException("name"); if (indexDefinition == null) throw new ArgumentNullException("indexDefinition"); if (viewGenerator == null) throw new ArgumentNullException("viewGenerator"); this.name = name; this.indexDefinition = indexDefinition; this.viewGenerator = viewGenerator; this.context = context; logIndexing.Debug("Creating index for {0}", name); this.directory = directory; flushSize = context.Configuration.FlushIndexToDiskSizeInMb * 1024 * 1024; RecreateSearcher(); }
public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator) { if (string.IsNullOrWhiteSpace(indexQuery.Query)) return; HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery); foreach (string field in hashSet) { string f = field; if (f.EndsWith("_Range")) { f = f.Substring(0, f.Length - "_Range".Length); } if (viewGenerator.ContainsField(f) == false && viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed"); } if (indexQuery.SortedFields == null) return; foreach (SortedField field in indexQuery.SortedFields) { string f = field.Field; if (f == Constants.TemporaryScoreValue) continue; if (f.EndsWith("_Range")) { f = f.Substring(0, f.Length - "_Range".Length); } if (f.StartsWith(Constants.RandomFieldName)) continue; if (viewGenerator.ContainsField(f) == false && f != Constants.DistanceFieldName && viewGenerator.ContainsField("_") == false)// the catch all field name means that we have dynamic fields names throw new ArgumentException("The field '" + f + "' is not indexed, cannot sort on fields that are not indexed"); } }
public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token) { var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>(); if ( Log.IsDebugEnabled ) Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var reducingBatchThrottlerId = Guid.NewGuid(); var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep); var reduceLevelStats = new ReduceLevelPeformanceStats { Started = SystemTime.UtcNow, Level = 2 }; try { var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localNeedToMoveToSingleStep = new HashSet<string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet<string>(); while (enumerator.MoveNext()) { token.ThrowIfCancellationRequested(); localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var getItemsToReduceDuration = Stopwatch.StartNew(); int scheduledItemsSum = 0; int scheduledItemsCount = 0; List<int> scheduledItemsMappedBuckets = new List<int>(); using (StopwatchScope.For(getItemsToReduceDuration)) { foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token)) { scheduledItemsMappedBuckets.Add(item.Bucket); scheduledItemsSum += item.Size; scheduledItemsCount++; } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds)); autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum); if (scheduledItemsCount == 0) { // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might have smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); var deletingScheduledReductionsDuration = Stopwatch.StartNew(); using (StopwatchScope.For(deletingScheduledReductionsDuration)) { foreach (var reduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); } var removeReduceResultsDuration = new Stopwatch(); foreach (var reduceKey in localKeys) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) localNeedToMoveToSingleStep.Add(reduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) continue; if ( Log.IsDebugEnabled ) { Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); } using (StopwatchScope.For(removeReduceResultsDuration)) { // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token); // add scheduled items too to be sure we will delete reduce results of already deleted documents foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets)) { actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024); } } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); }); reduceLevelStats.Operations.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var getMappedResultsDuration = new Stopwatch(); var reductionPerformanceStats = new List<IndexingPerformanceStats>(); var keysLeftToReduce = new HashSet<string>(keysToReduce); while (keysLeftToReduce.Count > 0) { var keysReturned = new HashSet<string>(); // Try to diminish the allocations happening because of .Resize() var mappedResults = new List<MappedResultInfo>(keysLeftToReduce.Count); context.TransactionalStorage.Batch(actions => { var take = context.CurrentNumberOfItemsToReduceInSingleBatch; using (StopwatchScope.For(getMappedResultsDuration)) { mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults); } }); var count = mappedResults.Count; int size = 0; foreach ( var item in mappedResults ) { item.Bucket = 0; size += item.Size; } var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray(); context.MetricsCounters.ReducedPerSecond.Mark(results.Length); token.ThrowIfCancellationRequested(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count); reductionPerformanceStats.Add(performance); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); } var needToMoveToSingleStep = new HashSet<string>(); HashSet<string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep)); } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index foreach (var stats in reductionPerformanceStats) { reduceLevelStats.Add(stats); } reducePerformanceStats.LevelStats.Add(reduceLevelStats); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _); } return reducePerformanceStats; }
private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List<string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete, CancellationToken token) { var needToMoveToMultiStep = new HashSet<string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) needToMoveToMultiStep.Add(localReduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) continue; // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexId, 0, result); } } }); var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep); var keysToReduceSet = new HashSet<string>(keysToReduce); for (int i = 0; i < 3; i++) { var level = i; var reduceLevelStats = new ReduceLevelPeformanceStats() { Level = level, Started = SystemTime.UtcNow, }; var reduceParams = new GetItemsToReduceParams( index.IndexId, keysToReduceSet, level, true, itemsToDelete); var gettingItemsToReduceDuration = new Stopwatch(); var scheduleReductionsDuration = new Stopwatch(); var removeReduceResultsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { token.ThrowIfCancellationRequested(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; int size = 0; IList<MappedResultInfo> persistedResults; var reduceKeys = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase); using (StopwatchScope.For(gettingItemsToReduceDuration)) { persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token); foreach (var item in persistedResults) { reduceKeys.Add(item.ReduceKey); size += item.Size; } } if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexId, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexId); } } token.ThrowIfCancellationRequested(); var requiredReduceNextTimeSet = new HashSet<ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(removeReduceResultsDuration)) { foreach (var mappedResultInfo in requiredReduceNextTimeSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } } if (level != 2) { var reduceKeysAndBucketsSet = new HashSet<ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(scheduleReductionsDuration)) { foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket); } } } token.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); var results = persistedResults.Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); context.MetricsCounters.ReducedPerSecond.Mark(results.Count()); reduceLevelStats.Add(performance); var batchDuration = batchTimeWatcher.Elapsed; if ( Log.IsDebugEnabled ) { Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.IndexId, reduceTimeWatcher.Elapsed, level); } autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _); } } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); reducePerformance.LevelStats.Add(reduceLevelStats); } foreach (var reduceKey in needToMoveToMultiStep) { token.ThrowIfCancellationRequested(); string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.MultiStep)); } return reducePerformance; }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var deleted = new Dictionary<ReduceKeyAndBucket, int>(); var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count); batch.SetIndexingPerformance(indexPerfStats); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>(); int loadDocumentCount = 0; long loadDocumentDuration = 0; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { var localStats = new IndexingWorkStats(); var localChanges = new HashSet<ReduceKeyAndBucket>(); var statsPerKey = new Dictionary<string, int>(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats); var currentDocumentResults = new List<object>(); string currentKey = null; bool skipDocument = false; foreach (var currentDoc in mapResults) { var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); currentDocumentResults.Clear(); currentKey = documentId; } else if (skipDocument) { continue; } currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer))); if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false) { skipDocument = true; currentDocumentResults.Clear(); continue; } Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); } })); UpdateIndexingStats(context, stats); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = documentsWrapped.Count, Operation = "Map", Duration = sw.Elapsed, Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); BatchCompleted("Current Map"); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); }