private void IndexDocuments(IStorageActionsAccessor actions, string index, JsonDocument[] jsonDocs) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(index); if (viewGenerator == null) { return; // index was deleted, probably } var dateTime = jsonDocs.Min(x => x.LastModified); var documentRetriever = new DocumentRetriever(null, context.ReadTriggers); try { log.DebugFormat("Indexing {0} documents for index: {1}", jsonDocs.Length, index); context.IndexStorage.Index(index, viewGenerator, jsonDocs .Select(doc => documentRetriever .ExecuteReadTriggers(doc, null, ReadOperation.Index)) .Where(doc => doc != null) .Select(x => JsonToExpando.Convert(x.ToJson())), context, actions, dateTime); } catch (Exception e) { if (actions.IsWriteConflict(e)) { return; } log.WarnFormat(e, "Failed to index documents for index: {0}", index); } }
public override void Execute(WorkContext context) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(Index); if (viewGenerator == null) { return; // index was deleted, probably } context.TransactionaStorage.Batch(actions => { var docsToIndex = actions.Documents.DocumentsById(FromId, ToId) .Select(d => d.Item1) .Where(x => x != null) .Select(s => JsonToExpando.Convert(s.ToJson())); context.IndexStorage.Index(Index, viewGenerator, docsToIndex, context, actions); }); }
public override void Execute(WorkContext context) { context.TransactionaStorage.Batch(actions => { var jsonDocuments = Keys.Select(key => actions.DocumentByKey(key, null)) .Where(x => x != null) .Select(x => JsonToExpando.Convert(x.ToJson())) .ToArray(); var keysAsString = string.Join(", ", Keys); var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(Index); if (viewGenerator == null) { return; // index was deleted, probably } try { logger.DebugFormat("Indexing documents: [{0}] for index: {1}", keysAsString, Index); var failureRate = actions.GetFailureRate(Index); if (failureRate.IsInvalidIndex) { logger.InfoFormat("Skipped indexing documents: [{0}] for index: {1} because failure rate is too high: {2}", keysAsString, Index, failureRate.FailureRate); return; } context.IndexStorage.Index(Index, viewGenerator, jsonDocuments, context, actions); } catch (Exception e) { logger.WarnFormat(e, "Failed to index document [{0}] for index: {1}", keysAsString, Index); } }); }
private IEnumerable <IndexingBatchForIndex> FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, List <JsonDocument> jsonDocs, Etag highestETagInBatch) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag; var lastModified = last.LastModified.Value; var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers, context.Database.InFlightTransactionalState); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc => { var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return(filteredDoc == null ? new { Doc = doc, Json = (object)new FilteredDocument(doc) } : new { Doc = filteredDoc, Json = JsonToExpando.Convert(doc.ToJson()) }); }); Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new IndexingBatchForIndex[indexesToWorkOn.Count]; var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) => { var indexName = indexToWorkOn.Index.PublicName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) { return; // probably deleted } var batch = new IndexingBatch(highestETagInBatch); foreach (var item in filteredDocs) { if (defaultPrefetchingBehavior.FilterDocuments(item.Doc) == false) { continue; } // did we already indexed this document in this index? var etag = item.Doc.Etag; if (etag == null) { continue; } // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc)); if (batch.DateTime == null) { batch.DateTime = item.Doc.LastModified; } else { batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } } if (batch.Docs.Count == 0) { Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => { accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified); accessor.AfterStorageCommit += () => { indexToWorkOn.Index.EnsureIndexWriter(); indexToWorkOn.Index.Flush(lastEtag); }; }; return; } if (Log.IsDebugEnabled) { Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids)); } results[i] = new IndexingBatchForIndex { Batch = batch, IndexId = indexToWorkOn.IndexId, Index = indexToWorkOn.Index, LastIndexedEtag = indexToWorkOn.LastIndexedEtag }; }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) { action(actionsAccessor); } } }); return(results.Where(x => x != null)); }
public static dynamic GetDocumentFromString(string json) { return(JsonToExpando.Convert(RavenJObject.Parse(json))); }
protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexName); if (viewGenerator == null) { return; } TimeSpan reduceDuration = TimeSpan.Zero; int totalCount = 0; int totalSize = 0; bool operationCanceled = false; var itemsToDelete = new List <object>(); try { var sw = Stopwatch.StartNew(); for (int i = 0; i < 3; i++) { var level = i; transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var sp = Stopwatch.StartNew(); var persistedResults = actions.MapReduce.GetItemsToReduce ( take: context.CurrentNumberOfItemsToReduceInSingleBatch, level: level, index: indexToWorkOn.IndexName, itemsToDelete: itemsToDelete ) .ToList(); totalCount += persistedResults.Count; totalSize += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), indexToWorkOn.IndexName, level, sp.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", indexToWorkOn.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)).Distinct().ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(indexToWorkOn.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBukcets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(indexToWorkOn.IndexName, level + 1, reduceKeysAndBukcets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(indexToWorkOn.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, indexToWorkOn.IndexName, sp.Elapsed); }); } reduceDuration = sw.Elapsed; } catch (OperationCanceledException) { operationCanceled = true; } finally { if (operationCanceled == false) { // whatever we succeeded in indexing or not, we have to update this // because otherwise we keep trying to re-index failed mapped results transactionalStorage.Batch(actions => { var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete); if (latest == null) { return; } actions.Indexing.UpdateLastReduced(indexToWorkOn.IndexName, latest.Etag, latest.Timestamp); }); autoTuner.AutoThrottleBatchSize(totalCount, totalSize, reduceDuration); } } }
private ReducingPerformanceStats SingleStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token) { var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >(); if (Log.IsDebugEnabled) { Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Count, string.Join(", ", keysToReduce))); } var batchTimeWatcher = Stopwatch.StartNew(); var reducingBatchThrottlerId = Guid.NewGuid(); var reducePerformanceStats = new ReducingPerformanceStats(ReduceType.SingleStep); var reduceLevelStats = new ReduceLevelPeformanceStats { Started = SystemTime.UtcNow, Level = 2 }; try { var parallelOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localNeedToMoveToSingleStep = new HashSet <string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet <string>(); while (enumerator.MoveNext()) { token.ThrowIfCancellationRequested(); localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: new HashSet <string>(localKeys), level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var getItemsToReduceDuration = Stopwatch.StartNew(); int scheduledItemsSum = 0; int scheduledItemsCount = 0; List <int> scheduledItemsMappedBuckets = new List <int>(); using (StopwatchScope.For(getItemsToReduceDuration)) { foreach (var item in actions.MapReduce.GetItemsToReduce(getItemsToReduceParams, token)) { scheduledItemsMappedBuckets.Add(item.Bucket); scheduledItemsSum += item.Size; scheduledItemsCount++; } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, getItemsToReduceDuration.ElapsedMilliseconds)); autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItemsSum); if (scheduledItemsCount == 0) { // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might have smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); var deletingScheduledReductionsDuration = Stopwatch.StartNew(); using (StopwatchScope.For(deletingScheduledReductionsDuration)) { foreach (var reduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_DeleteScheduledReductions, deletingScheduledReductionsDuration.ElapsedMilliseconds)); } var removeReduceResultsDuration = new Stopwatch(); foreach (var reduceKey in localKeys) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { localNeedToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } if (Log.IsDebugEnabled) { Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); } using (StopwatchScope.For(removeReduceResultsDuration)) { // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey, token); // add scheduled items too to be sure we will delete reduce results of already deleted documents foreach (var mappedBucket in mappedBuckets.Union(scheduledItemsMappedBuckets)) { actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024); } } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); }); reduceLevelStats.Operations.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var getMappedResultsDuration = new Stopwatch(); var reductionPerformanceStats = new List <IndexingPerformanceStats>(); var keysLeftToReduce = new HashSet <string>(keysToReduce); while (keysLeftToReduce.Count > 0) { var keysReturned = new HashSet <string>(); // Try to diminish the allocations happening because of .Resize() var mappedResults = new List <MappedResultInfo>(keysLeftToReduce.Count); context.TransactionalStorage.Batch(actions => { var take = context.CurrentNumberOfItemsToReduceInSingleBatch; using (StopwatchScope.For(getMappedResultsDuration)) { mappedResults = actions.MapReduce.GetMappedResults(index.IndexId, keysLeftToReduce, true, take, keysReturned, token, mappedResults); } }); var count = mappedResults.Count; int size = 0; foreach (var item in mappedResults) { item.Bucket = 0; size += item.Size; } var results = mappedResults.GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)).ToArray(); context.MetricsCounters.ReducedPerSecond.Mark(results.Length); token.ThrowIfCancellationRequested(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, null, keysReturned, count); reductionPerformanceStats.Add(performance); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); } var needToMoveToSingleStep = new HashSet <string>(); HashSet <string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep)); } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetMappedResults, getMappedResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, 0)); // in single step we write directly to Lucene index foreach (var stats in reductionPerformanceStats) { reduceLevelStats.Add(stats); } reducePerformanceStats.LevelStats.Add(reduceLevelStats); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _); } return(reducePerformanceStats); }
private ReducingPerformanceStats MultiStepReduce(IndexToWorkOn index, List <string> keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete, CancellationToken token) { var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { token.ThrowIfCancellationRequested(); var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey, token).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexId, 0, result); } } }); var reducePerformance = new ReducingPerformanceStats(ReduceType.MultiStep); for (int i = 0; i < 3; i++) { var level = i; var reduceLevelStats = new ReduceLevelPeformanceStats() { Level = level, Started = SystemTime.UtcNow, }; var reduceParams = new GetItemsToReduceParams( index.IndexId, new HashSet <string>(keysToReduce), level, true, itemsToDelete); var gettingItemsToReduceDuration = new Stopwatch(); var scheduleReductionsDuration = new Stopwatch(); var removeReduceResultsDuration = new Stopwatch(); var storageCommitDuration = new Stopwatch(); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { token.ThrowIfCancellationRequested(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += storageCommitDuration.Stop; var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; int size = 0; IList <MappedResultInfo> persistedResults; var reduceKeys = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); using (StopwatchScope.For(gettingItemsToReduceDuration)) { persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams, token); foreach (var item in persistedResults) { reduceKeys.Add(item.ReduceKey); size += item.Size; } } if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.Index.PublicName, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.Index.PublicName); } } token.ThrowIfCancellationRequested(); var requiredReduceNextTimeSet = new HashSet <ReduceKeyAndBucket>(persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(removeReduceResultsDuration)) { foreach (var mappedResultInfo in requiredReduceNextTimeSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } } if (level != 2) { var reduceKeysAndBucketsSet = new HashSet <ReduceKeyAndBucket>(requiredReduceNextTimeSet.Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)), ReduceKeyAndBucketEqualityComparer.Instance); using (StopwatchScope.For(scheduleReductionsDuration)) { foreach (var reduceKeysAndBucket in reduceKeysAndBucketsSet) { token.ThrowIfCancellationRequested(); actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket); } } } token.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); var results = persistedResults.Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToList(); var performance = context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); context.MetricsCounters.ReducedPerSecond.Mark(results.Count()); reduceLevelStats.Add(performance); var batchDuration = batchTimeWatcher.Elapsed; if (Log.IsDebugEnabled) { Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, performance.ItemsCount, index.Index.PublicName, reduceTimeWatcher.Elapsed, level); } autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _); } } reduceLevelStats.Completed = SystemTime.UtcNow; reduceLevelStats.Duration = reduceLevelStats.Completed - reduceLevelStats.Started; reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_GetItemsToReduce, gettingItemsToReduceDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Reduce_RemoveReduceResults, removeReduceResultsDuration.ElapsedMilliseconds)); reduceLevelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); reducePerformance.LevelStats.Add(reduceLevelStats); } foreach (var reduceKey in needToMoveToMultiStep) { token.ThrowIfCancellationRequested(); string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.MultiStep)); } return(reducePerformance); }
private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete) { var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))) { actions.MapReduce.ScheduleReductions(index.IndexName, 0, result); } } }); for (int i = 0; i < 3; i++) { var level = i; var reduceParams = new GetItemsToReduceParams( index.IndexName, keysToReduce, level, true, itemsToDelete); bool retry = true; while (retry && reduceParams.ReduceKeys.Count > 0) { var reduceBatchAutoThrottlerId = Guid.NewGuid(); try { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var batchTimeWatcher = Stopwatch.StartNew(); reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch; var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList(); if (persistedResults.Count == 0) { retry = false; return; } var count = persistedResults.Count; var size = persistedResults.Sum(x => x.Size); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reduceBatchAutoThrottlerId, size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, batchTimeWatcher.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); foreach (var reduceKeysAndBucket in reduceKeysAndBuckets) { actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBucket); } } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); var reduceTimeWatcher = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count); var batchDuration = batchTimeWatcher.Elapsed; Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration, results.Length, index.IndexName, reduceTimeWatcher.Elapsed, level); autoTuner.AutoThrottleBatchSize(count, size, batchDuration); }); } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reduceBatchAutoThrottlerId, out _); } } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } }
private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet <object> itemsToDelete) { var needToMoveToSingleStepQueue = new ConcurrentQueue <HashSet <string> >(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); var batchTimeWatcher = Stopwatch.StartNew(); var count = 0; var size = 0; var state = new ConcurrentQueue <Tuple <HashSet <string>, List <MappedResultInfo> > >(); var reducingBatchThrottlerId = Guid.NewGuid(); try { BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator => { var localNeedToMoveToSingleStep = new HashSet <string>(); needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep); var localKeys = new HashSet <string>(); while (enumerator.MoveNext()) { localKeys.Add(enumerator.Current); } transactionalStorage.Batch(actions => { var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexName, reduceKeys: localKeys, level: 0, loadData: false, itemsToDelete: itemsToDelete) { Take = int.MaxValue // just get all, we do the rate limit when we load the number of keys to reduce, anyway }; var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList(); autoTuner.CurrentlyUsedBatchSizes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size)); if (scheduledItems.Count == 0) { if (Log.IsWarnEnabled) { Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact [email protected])", string.Join(", ", keysToReduce)); } // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2. // They shouldn't be here, and indeed, we remove them just a little down from here in this function. // That said, they might bave smuggled in between versions, or something happened to cause them to be here. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on foreach (var reduceKey in keysToReduce) { actions.MapReduce.DeleteScheduledReduction(index.IndexName, 1, reduceKey); actions.MapReduce.DeleteScheduledReduction(index.IndexName, 2, reduceKey); } } foreach (var reduceKey in localKeys) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { localNeedToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, localKeys, loadData: true ).ToList(); Interlocked.Add(ref count, mappedResults.Count); Interlocked.Add(ref size, mappedResults.Sum(x => x.Size)); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); state.Enqueue(Tuple.Create(localKeys, mappedResults)); }); }); var reduceKeys = new HashSet <string>(state.SelectMany(x => x.Item1)); var results = state.SelectMany(x => x.Item2) .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.PerformanceCounters.ReducedPerSecond.IncrementBy(results.Length); context.TransactionalStorage.Batch(actions => context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x => x.Item2.Count)) ); autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed); var needToMoveToSingleStep = new HashSet <string>(); HashSet <string> set; while (needToMoveToSingleStepQueue.TryDequeue(out set)) { needToMoveToSingleStep.UnionWith(set); } foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } } finally { long _; autoTuner.CurrentlyUsedBatchSizes.TryRemove(reducingBatchThrottlerId, out _); } }
private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch, out List<IndexToWorkOn> filteredOutIndexes) { var innerFilteredOutIndexes = new ConcurrentStack<IndexToWorkOn>(); var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag; var lastModified = last.LastModified.Value; var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc => { var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, ReadOperation.Index); return filteredDoc == null ? new { Doc = doc, Json = (object)new FilteredDocument(doc) } : new { Doc = filteredDoc, Json = JsonToExpando.Convert(doc.ToJson()) }; }); if ( Log.IsDebugEnabled ) Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new ConcurrentQueue<IndexingBatchForIndex>(); var actions = new ConcurrentQueue<Action<IStorageActionsAccessor>>(); context.Database.MappingThreadPool.ExecuteBatch(indexesToWorkOn, indexToWorkOn => { var indexName = indexToWorkOn.Index.PublicName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) return; // probably deleted var batch = new IndexingBatch(highestETagInBatch); foreach (var filteredDoc in filteredDocs) { var doc = filteredDoc.Doc; var json = filteredDoc.Json; if (defaultPrefetchingBehavior.FilterDocuments(doc) == false || doc.Etag.CompareTo(indexToWorkOn.LastIndexedEtag) <= 0) continue; // did we already indexed this document in this index? var etag = doc.Etag; if (etag == null) continue; // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(doc.Metadata.Value<string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(doc, json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(doc)); if (batch.DateTime == null) batch.DateTime = doc.LastModified; else batch.DateTime = batch.DateTime > doc.LastModified ? doc.LastModified : batch.DateTime; } if (batch.Docs.Count == 0) { if ( Log.IsDebugEnabled ) Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together if (indexToWorkOn.LastIndexedEtag.CompareTo(lastEtag) < 0) actions.Enqueue(accessor => { accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified); accessor.AfterStorageCommit += () => { indexToWorkOn.Index.EnsureIndexWriter(); indexToWorkOn.Index.Flush(lastEtag); }; }); innerFilteredOutIndexes.Push(indexToWorkOn); context.MarkIndexFilteredOut(indexName); return; } if (Log.IsDebugEnabled) Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids)); results.Enqueue(new IndexingBatchForIndex { Batch = batch, IndexId = indexToWorkOn.IndexId, Index = indexToWorkOn.Index, LastIndexedEtag = indexToWorkOn.LastIndexedEtag }); }, description: string.Format("Filtering documents for {0} indexes", indexesToWorkOn.Count)); filteredOutIndexes = innerFilteredOutIndexes.ToList(); foreach (var action in actions) { bool keepTrying = true; for (int i = 0; i < 10 && keepTrying; i++) { keepTrying = false; transactionalStorage.Batch(actionsAccessor => { if (action != null) { try { action(actionsAccessor); } catch (Exception e) { if (actionsAccessor.IsWriteConflict(e)) { keepTrying = true; return; } throw; } } }); if (keepTrying) Thread.Sleep(11); } } return results.Where(x => x != null); }
private ReduceResultStats MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToMultiStep = new HashSet <string>(); transactionalStorage.Batch(actions => { foreach (var localReduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, localReduceKey); if (lastPerformedReduceType != ReduceType.MultiStep) { needToMoveToMultiStep.Add(localReduceKey); } if (lastPerformedReduceType != ReduceType.SingleStep) { continue; } // we exceeded the limit of items to reduce in single step // now we need to schedule reductions at level 0 for all map results with given reduce key var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexName, localReduceKey).ToList(); actions.MapReduce.ScheduleReductions(index.IndexName, 0, mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey))); } }); for (int i = 0; i < 3; i++) { var level = i; bool retry = true; var itemsAlreadySeen = new HashSet <Tuple <string, int> >(); while (retry) { transactionalStorage.Batch(actions => { context.CancellationToken.ThrowIfCancellationRequested(); var sp = Stopwatch.StartNew(); var persistedResults = actions.MapReduce.GetItemsToReduce ( level: level, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: true, take: context.CurrentNumberOfItemsToReduceInSingleBatch, itemsAlreadySeen: itemsAlreadySeen ).ToList(); if (persistedResults.Count == 0) { retry = false; return; } result.count += persistedResults.Count; result.size += persistedResults.Sum(x => x.Size); if (Log.IsDebugEnabled) { if (persistedResults.Count > 0) { Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}", persistedResults.Count, string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()), index.IndexName, level, sp.Elapsed)); } else { Log.Debug("No reduce keys found for {0}", index.IndexName); } } context.CancellationToken.ThrowIfCancellationRequested(); var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey)) .OrderBy(x => x.Bucket) .Distinct() .ToArray(); foreach (var mappedResultInfo in requiredReduceNextTime) { actions.MapReduce.RemoveReduceResults(index.IndexName, level + 1, mappedResultInfo.ReduceKey, mappedResultInfo.Bucket); } if (level != 2) { var reduceKeysAndBuckets = requiredReduceNextTime .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey)) .Distinct() .ToArray(); actions.MapReduce.ScheduleReductions(index.IndexName, level + 1, reduceKeysAndBuckets); } var results = persistedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); var reduceKeys = new HashSet <string>(persistedResults.Select(x => x.ReduceKey), StringComparer.InvariantCultureIgnoreCase); context.ReducedPerSecIncreaseBy(results.Length); context.CancellationToken.ThrowIfCancellationRequested(); sp = Stopwatch.StartNew(); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, level, context, actions, reduceKeys); Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4}", reduceKeys.Count, sp.Elapsed, results.Length, index.IndexName, sp.Elapsed); }); } } foreach (var reduceKey in needToMoveToMultiStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.MultiStep)); } return(result); }
private ReduceResultStats SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, List <object> itemsToDelete) { var result = new ReduceResultStats(); var needToMoveToSingleStep = new HashSet <string>(); Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce))); transactionalStorage.Batch(actions => { var scheduledItems = actions.MapReduce.GetItemsToReduce ( level: 0, reduceKeys: keysToReduce, index: index.IndexName, itemsToDelete: itemsToDelete, loadData: false, take: int.MaxValue, // just get all, we do the rate limit when we load the number of keys to reduce, anyway itemsAlreadySeen: new HashSet <Tuple <string, int> >() ).ToList(); // Only look at the scheduled batch for this run, not the entire set of pending reductions. //var batchKeys = scheduledItems.Select(x => x.ReduceKey).ToArray(); foreach (var reduceKey in keysToReduce) { var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexName, reduceKey); if (lastPerformedReduceType != ReduceType.SingleStep) { needToMoveToSingleStep.Add(reduceKey); } if (lastPerformedReduceType != ReduceType.MultiStep) { continue; } Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records", reduceKey); // now we are in single step but previously multi step reduce was performed for the given key var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexName, reduceKey).ToList(); // add scheduled items too to be sure we will delete reduce results of already deleted documents mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket)); foreach (var mappedBucket in mappedBuckets.Distinct()) { actions.MapReduce.RemoveReduceResults(index.IndexName, 1, reduceKey, mappedBucket); actions.MapReduce.RemoveReduceResults(index.IndexName, 2, reduceKey, mappedBucket / 1024); } } var mappedResults = actions.MapReduce.GetMappedResults( index.IndexName, keysToReduce, loadData: true ).ToList(); result.count += mappedResults.Count; result.size += mappedResults.Sum(x => x.Size); var reduceKeys = new HashSet <string>(keysToReduce); mappedResults.ApplyIfNotNull(x => x.Bucket = 0); var results = mappedResults .Where(x => x.Data != null) .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data)) .ToArray(); context.ReducedPerSecIncreaseBy(results.Length); context.IndexStorage.Reduce(index.IndexName, viewGenerator, results, 2, context, actions, reduceKeys); }); foreach (var reduceKey in needToMoveToSingleStep) { string localReduceKey = reduceKey; transactionalStorage.Batch(actions => actions.MapReduce.UpdatePerformedReduceType(index.IndexName, localReduceKey, ReduceType.SingleStep)); } return(result); }
private IEnumerable <Tuple <IndexToWorkOn, IndexingBatch> > FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag.Value; var lastModified = last.LastModified.Value; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); var documentRetriever = new DocumentRetriever(null, context.ReadTriggers); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc => { doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return(doc == null ? null : new { Doc = doc, Json = JsonToExpando.Convert(doc.ToJson()) }); }); log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new Tuple <IndexToWorkOn, IndexingBatch> [indexesToWorkOn.Count]; var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) => { var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()); if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0) { return; } var indexName = indexToWorkOn.IndexName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) { return; // probably deleted } var batch = new IndexingBatch(); foreach (var item in filteredDocs) { // did we already indexed this document in this index? if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0) { continue; } // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json); if (batch.DateTime == null) { batch.DateTime = item.Doc.LastModified; } else { batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } } if (batch.Docs.Count == 0) { log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified); return; } log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn); results[i] = Tuple.Create(indexToWorkOn, batch); }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) { action(actionsAccessor); } } }); return(results.Where(x => x != null)); }