protected void UpdateIndexingStats(WorkContext context, IndexingWorkStats stats) { context.TransactionaStorage.Batch(accessor => { switch (stats.Operation) { case IndexingWorkStats.Status.Map: accessor.Indexing.UpdateIndexingStats(name, stats); break; case IndexingWorkStats.Status.Reduce: accessor.Indexing.UpdateReduceStats(name, stats); break; case IndexingWorkStats.Status.Ignore: break; default: throw new ArgumentOutOfRangeException(); } }); }
protected IEnumerable <object> RobustEnumerationReduce(IEnumerable <object> input, IndexingFunc func, IStorageActionsAccessor actions, WorkContext context, IndexingWorkStats stats) { // not strictly accurate, but if we get that many errors, probably an error anyway. return(new RobustEnumerator(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch) { BeforeMoveNext = () => stats.ReduceAttempts++, CancelMoveNext = () => stats.ReduceAttempts--, OnError = (exception, o) => { context.AddError(name, TryGetDocKey(o), exception.Message ); logIndexing.WarnException( String.Format("Failed to execute indexing function on {0} on {1}", name, TryGetDocKey(o)), exception); stats.ReduceErrors++; } }.RobustEnumeration(input, func)); }
protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats,out Action<Exception, object> onError) { onError = (exception, o) => { string docId = null; var invalidSpatialException = exception as InvalidSpatialShapeException; if (invalidSpatialException != null) docId = invalidSpatialException.InvalidDocumentId; context.AddError(name, docId ?? TryGetDocKey(o), exception.Message, "Map" ); logIndexing.WarnException( String.Format("Failed to execute indexing function on {0} on {1}", name, TryGetDocKey(o)), exception); stats.IndexingErrors++; }; return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch) { BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts), CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts), OnError = onError }.RobustEnumeration(input, funcs); }
protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats) { switch (stats.Operation) { case IndexingWorkStats.Status.Map: workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateIndexingStats(name, stats)); break; case IndexingWorkStats.Status.Reduce: workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateReduceStats(name, stats)); break; case IndexingWorkStats.Status.Ignore: break; default: throw new ArgumentOutOfRangeException(); } }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var deleted = new Dictionary<ReduceKeyAndBucket, int>(); var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count); batch.SetIndexingPerformance(indexPerfStats); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>(); int loadDocumentCount = 0; long loadDocumentDuration = 0; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { var localStats = new IndexingWorkStats(); var localChanges = new HashSet<ReduceKeyAndBucket>(); var statsPerKey = new Dictionary<string, int>(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats); var currentDocumentResults = new List<object>(); string currentKey = null; bool skipDocument = false; foreach (var currentDoc in mapResults) { var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); currentDocumentResults.Clear(); currentKey = documentId; } else if (skipDocument) { continue; } currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer))); if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false) { skipDocument = true; currentDocumentResults.Clear(); continue; } Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); } })); UpdateIndexingStats(context, stats); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = documentsWrapped.Count, Operation = "Map", Duration = sw.Elapsed, Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); BatchCompleted("Current Map"); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var changed = new HashSet <ReduceKeyAndBucket>(); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return(doc); }) .Where(x => x is FilteredDocument == false); var items = new List <MapResultItem>(); var stats = new IndexingWorkStats(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { var mapResults = RobustEnumerationIndex( documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats) .ToList(); actions.MapReduce.UpdateRemovedMapReduceStats(name, changed); foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId)) { var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList(); foreach ( var doc in RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); items.Add(new MapResultItem { Data = data, DocId = docId, ReduceKey = reduceKey }); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } } IDictionary <string, HashSet <string> > result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); actions.General.MaybePulseTransaction(); } } foreach (var mapResultItem in items) { actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data); actions.General.MaybePulseTransaction(); } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed, Started = start }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
public void UpdateIndexingStats(string index, IndexingWorkStats stats) { var indexStats = GetCurrentIndex(index); indexStats["attempts"] = indexStats.Value<int>("attempts") + stats.IndexingAttempts; indexStats["successes"] = indexStats.Value<int>("successes") + stats.IndexingSuccesses; indexStats["failures"] = indexStats.Value<int>("failures") + stats.IndexingErrors; storage.IndexingStats.UpdateKey(indexStats); }
public void UpdateReduceStats(int id, IndexingWorkStats stats) { SetCurrentIndexStatsToImpl(id); using (var update = new Update(session, IndexesStatsReduce, JET_prep.Replace)) { var oldAttempts = Api.RetrieveColumnAsInt32(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_attempts"]) ?? 0; Api.SetColumn(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_attempts"], oldAttempts + stats.ReduceAttempts); var oldErrors = Api.RetrieveColumnAsInt32(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_errors"]) ?? 0; Api.SetColumn(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_errors"], oldErrors + stats.ReduceErrors); var olsSuccesses = Api.RetrieveColumnAsInt32(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_successes"]) ?? 0; Api.SetColumn(session, IndexesStatsReduce, tableColumnsCache.IndexesStatsReduceColumns["reduce_successes"], olsSuccesses + stats.ReduceSuccesses); update.Save(); } }
protected IEnumerable<object> RobustEnumerationIndex(IEnumerable<object> input, IEnumerable<IndexingFunc> funcs, IStorageActionsAccessor actions, WorkContext context, IndexingWorkStats stats) { return new RobustEnumerator(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch) { BeforeMoveNext = () => stats.IndexingAttempts++, CancelMoveNext = () => stats.IndexingAttempts--, OnError = (exception, o) => { context.AddError(name, TryGetDocKey(o), exception.Message ); logIndexing.WarnException( String.Format("Failed to execute indexing function on {0} on {1}", name, TryGetDocKey(o)), exception); stats.IndexingErrors++; } }.RobustEnumeration(input, funcs); }
protected IEnumerable <object> RobustEnumerationIndex(IEnumerable <object> input, IEnumerable <IndexingFunc> funcs, IStorageActionsAccessor actions, WorkContext context, IndexingWorkStats stats) { return(new RobustEnumerator(context.Configuration.MaxNumberOfItemsToIndexInSingleBatch) { BeforeMoveNext = () => stats.IndexingAttempts++, CancelMoveNext = () => stats.IndexingAttempts--, OnError = (exception, o) => { context.AddError(name, TryGetDocKey(o), exception.Message ); logIndexing.WarnException( String.Format("Failed to execute indexing function on {0} on {1}", name, TryGetDocKey(o)), exception); stats.IndexingErrors++; } }.RobustEnumeration(input, funcs)); }
protected void Write(WorkContext context, Func <IndexWriter, Analyzer, IndexingWorkStats, int> action) { if (disposed) { throw new ObjectDisposedException("Index " + name + " has been disposed"); } lock (writeLock) { bool shouldRecreateSearcher; var toDispose = new List <Action>(); Analyzer searchAnalyzer = null; try { try { searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose); } catch (Exception e) { context.AddError(name, "Creating Analyzer", e.ToString()); throw; } if (indexWriter == null) { indexWriter = CreateIndexWriter(directory); } var stats = new IndexingWorkStats(); try { var changedDocs = action(indexWriter, searchAnalyzer, stats); docCountSinceLastOptimization += changedDocs; shouldRecreateSearcher = changedDocs > 0; foreach (IIndexExtension indexExtension in indexExtensions.Values) { indexExtension.OnDocumentsIndexed(currentlyIndexDocuments); } } catch (Exception e) { context.AddError(name, null, e.ToString()); throw; } UpdateIndexingStats(context, stats); WriteTempIndexToDiskIfNeeded(context); if (configuration.TransactionMode == TransactionMode.Safe) { Flush(); // just make sure changes are flushed to disk } } finally { currentlyIndexDocuments.Clear(); if (searchAnalyzer != null) { searchAnalyzer.Close(); } foreach (Action dispose in toDispose) { dispose(); } } if (shouldRecreateSearcher) { RecreateSearcher(); } } }
public void UpdateIndexingStats(string index, IndexingWorkStats stats) { SetCurrentIndexStatsToImpl(index); using (var update = new Update(session, IndexesStats, JET_prep.Replace)) { var oldAttempts = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"]) ?? 0; Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"], oldAttempts + stats.IndexingAttempts); var oldErrors = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"]) ?? 0; Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"], oldErrors + stats.IndexingErrors); var olsSuccesses = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"]) ?? 0; Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"], olsSuccesses + stats.IndexingSuccesses); update.Save(); } }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var changed = new HashSet<ReduceKeyAndBucket>(); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var items = new ConcurrentQueue<MapResultItem>(); var stats = new IndexingWorkStats(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); if (documentsWrapped.Count > 0) actions.MapReduce.UpdateRemovedMapReduceStats(name, changed); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats); var currentDocumentResults = new List<object>(); string currentKey = null; foreach (var currentDoc in mapResults) { var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items); currentDocumentResults.Clear(); currentKey = documentId; } currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer))); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, items); } }); IDictionary<string, HashSet<string>> result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); actions.General.MaybePulseTransaction(); } } foreach (var mapResultItem in items) { changed.Add(new ReduceKeyAndBucket(mapResultItem.Bucket, mapResultItem.ReduceKey)); actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data); actions.General.MaybePulseTransaction(); } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed, Started = start }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var changed = new HashSet <ReduceKeyAndBucket>(); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return(doc); }) .Where(x => x is FilteredDocument == false); var items = new List <MapResultItem>(); var stats = new IndexingWorkStats(); foreach ( var mappedResultFromDocument in GroupByDocumentId(context, RobustEnumerationIndex(documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats))) { var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList(); foreach ( var doc in RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data); items.Add(new MapResultItem { Data = data, DocId = docId, ReduceKey = reduceKey }); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } int mapCount = 0; foreach (var mapResultItem in items) { actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data); if (mapCount++ % 50000 == 0) { // The reason this is here is to protect us from Version Store Out Of Memory error during indexing // this can happen if we have indexes that output a VERY large number of items per doc. actions.General.PulseTransaction(); } } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed, Started = start }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable <dynamic> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var changed = new HashSet <ReduceKeyAndBucket>(); var documentsWrapped = documents.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return(doc); }) .Where(x => x is FilteredDocument == false); var stats = new IndexingWorkStats(); foreach ( var mappedResultFromDocument in GroupByDocumentId(context, RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))) { foreach ( var doc in RobustEnumerationReduceDuringMapPhase(mappedResultFromDocument, viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data); actions.MapReduce.PutMappedResult(name, docId, reduceKey, data); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
protected void Write(WorkContext context, Func<IndexWriter, Analyzer, IndexingWorkStats, bool> action) { if (disposed) throw new ObjectDisposedException("Index " + name + " has been disposed"); lock (writeLock) { bool shouldRecreateSearcher; var toDispose = new List<Action>(); Analyzer searchAnalyzer = null; try { try { searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose); } catch (Exception e) { context.AddError(name, "Creating Analyzer", e.ToString()); throw; } if (indexWriter == null) { indexWriter = CreateIndexWriter(directory); } var stats = new IndexingWorkStats(); try { shouldRecreateSearcher = action(indexWriter, searchAnalyzer, stats); foreach (IIndexExtension indexExtension in indexExtensions.Values) { indexExtension.OnDocumentsIndexed(currentlyIndexDocuments); } } catch (Exception e) { context.AddError(name, null, e.ToString()); throw; } UpdateIndexingStats(context, stats); WriteTempIndexToDiskIfNeeded(context); if (configuration.TransactionMode == TransactionMode.Safe) { Flush(optimize: false); } } finally { currentlyIndexDocuments.Clear(); if (searchAnalyzer != null) searchAnalyzer.Close(); foreach (Action dispose in toDispose) { dispose(); } } if (shouldRecreateSearcher) RecreateSearcher(); } }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var deleted = new Dictionary <ReduceKeyAndBucket, int>(); RecordCurrentBatch("Current Map", batch.Docs.Count); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); var allState = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { var localStats = new IndexingWorkStats(); var localChanges = new HashSet <ReduceKeyAndBucket>(); var statsPerKey = new Dictionary <string, int>(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats); var currentDocumentResults = new List <object>(); string currentKey = null; foreach (var currentDoc in mapResults) { var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); currentDocumentResults.Clear(); currentKey = documentId; } currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer))); EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count); Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); } })); UpdateIndexingStats(context, stats); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = documentsWrapped.Count, Operation = "Map", Duration = sw.Elapsed, Started = start }); BatchCompleted("Current Map"); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); }
public void UpdateIndexingStats(int id, IndexingWorkStats stats) { SetCurrentIndexStatsToImpl(id); using (var update = new Update(session, IndexesStats, JET_prep.Replace)) { var oldAttempts = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"]) ?? 0; Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["attempts"], oldAttempts + stats.IndexingAttempts); var oldErrors = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"]) ?? 0; Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["errors"], oldErrors + stats.IndexingErrors); var olsSuccesses = Api.RetrieveColumnAsInt32(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"]) ?? 0; Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["successes"], olsSuccesses + stats.IndexingSuccesses); Api.SetColumn(session, IndexesStats, tableColumnsCache.IndexesStatsColumns["last_indexing_time"], SystemTime.UtcNow.ToBinary()); update.Save(); } }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable <dynamic> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; // we mark the reduce keys to delete when we delete the mapped results, then we remove // any reduce key that is actually being used to generate new mapped results // this way, only reduces that removed data will force us to use the tasks approach var reduceKeysToDelete = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); var documentsWrapped = documents.Select(doc => { var documentId = doc.__document_id; foreach (var reduceKey in actions.MappedResults.DeleteMappedResultsForDocumentId((string)documentId, name)) { reduceKeysToDelete.Add(reduceKey); } return(doc); }); var stats = new IndexingWorkStats(); foreach (var mappedResultFromDocument in GroupByDocumentId(RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))) { foreach (var doc in RobustEnumerationReduceDuringMapPhase(mappedResultFromDocument, viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); reduceKeysToDelete.Remove((string)reduceKey); var data = GetMapedData(doc); logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data); var hash = ComputeHash(name, reduceKey); actions.MappedResults.PutMappedResult(name, docId, reduceKey, data, hash); } } UpdateIndexingStats(context, stats); if (reduceKeysToDelete.Count > 0) { actions.Tasks.AddTask(new ReduceTask { Index = name, ReduceKeys = reduceKeysToDelete.ToArray() }, minimumTimestamp); } logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
public void UpdateReduceStats(string index, IndexingWorkStats stats) { var indexStats = GetCurrentIndex(index); indexStats["reduce_attempts"] = indexStats.Value<int>("reduce_attempts") + stats.ReduceAttempts; indexStats["reduce_successes"] = indexStats.Value<int>("reduce_successes") + stats.ReduceSuccesses; indexStats["reduce_failures"] = indexStats.Value<int>("reduce_failures") + stats.ReduceSuccesses; storage.IndexingStats.UpdateKey(indexStats); }
public void UpdateIndexingStats(string index, IndexingWorkStats stats) { using (storage.WriteLock()) { var indexStats = (RavenJObject) GetCurrentIndex(index).CloneToken(); indexStats["attempts"] = indexStats.Value<int>("attempts") + stats.IndexingAttempts; indexStats["successes"] = indexStats.Value<int>("successes") + stats.IndexingSuccesses; indexStats["failures"] = indexStats.Value<int>("failures") + stats.IndexingErrors; indexStats["lastIndexingTime"] = SystemTime.UtcNow; storage.IndexingStats.UpdateKey(indexStats); } }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var deleted = new Dictionary <ReduceKeyAndBucket, int>(); var performance = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count); var performanceStats = new List <BasePerformanceStats>(); var usedStorageAccessors = new ConcurrentSet <IStorageActionsAccessor>(); if (usedStorageAccessors.TryAdd(actions)) { var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; } var deleteMappedResultsDuration = new Stopwatch(); var documentsWrapped = batch.Docs.Select(doc => { token.ThrowIfCancellationRequested(); sourceCount++; var documentId = doc.__document_id; using (StopwatchScope.For(deleteMappedResultsDuration)) { actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); } return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Map_DeleteMappedResults, DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds, }); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); var allState = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >(); var parallelOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localStats = new IndexingWorkStats(); var localChanges = new HashSet <ReduceKeyAndBucket>(); var statsPerKey = new Dictionary <string, int>(); var linqExecutionDuration = new Stopwatch(); var reduceInMapLinqExecutionDuration = new Stopwatch(); var putMappedResultsDuration = new Stopwatch(); var convertToRavenJObjectDuration = new Stopwatch(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { if (usedStorageAccessors.TryAdd(accessor)) { var storageCommitDuration = new Stopwatch(); accessor.BeforeStorageCommit += storageCommitDuration.Start; accessor.AfterStorageCommit += () => { storageCommitDuration.Stop(); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; } var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration); var currentDocumentResults = new List <object>(); string currentKey = null; bool skipDocument = false; foreach (var currentDoc in mapResults) { token.ThrowIfCancellationRequested(); var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration); currentDocumentResults.Clear(); currentKey = documentId; } else if (skipDocument) { continue; } RavenJObject currentDocJObject; using (StopwatchScope.For(convertToRavenJObjectDuration)) { currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer); } currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject)); if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false) { skipDocument = true; currentDocumentResults.Clear(); continue; } Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); } }); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); actions.General.MaybePulseTransaction(); var parallelReductionOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelReductionStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds }; var scheduleReductionsDuration = new Stopwatch(); using (StopwatchScope.For(scheduleReductionsDuration)) { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); accessor.General.MaybePulseTransaction(); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); parallelReductionOperations.Enqueue(parallelStats); })); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelReductionOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds, BatchedOperations = parallelReductionOperations.ToList() }); UpdateIndexingStats(context, stats); performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); return(performance); }
protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats) { return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch) { BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts), CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts), OnError = (exception, o) => { context.AddError(name, TryGetDocKey(o), exception.Message, "Map" ); logIndexing.WarnException( String.Format("Failed to execute indexing function on {0} on {1}", name, TryGetDocKey(o)), exception); stats.IndexingErrors++; } }.RobustEnumeration(input, funcs); }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var deleted = new Dictionary<ReduceKeyAndBucket, int>(); var performance = RecordCurrentBatch("Current Map", "Map", batch.Docs.Count); var performanceStats = new List<BasePerformanceStats>(); var usedStorageAccessors = new ConcurrentSet<IStorageActionsAccessor>(); if (usedStorageAccessors.TryAdd(actions)) { var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; } var deleteMappedResultsDuration = new Stopwatch(); var documentsWrapped = batch.Docs.Select(doc => { token.ThrowIfCancellationRequested(); sourceCount++; var documentId = doc.__document_id; using (StopwatchScope.For(deleteMappedResultsDuration)) { actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); } return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Map_DeleteMappedResults, DurationMs = deleteMappedResultsDuration.ElapsedMilliseconds, }); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>(); var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var localStats = new IndexingWorkStats(); var localChanges = new HashSet<ReduceKeyAndBucket>(); var statsPerKey = new Dictionary<string, int>(); var linqExecutionDuration = new Stopwatch(); var reduceInMapLinqExecutionDuration = new Stopwatch(); var putMappedResultsDuration = new Stopwatch(); var convertToRavenJObjectDuration = new Stopwatch(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { if (usedStorageAccessors.TryAdd(accessor)) { var storageCommitDuration = new Stopwatch(); accessor.BeforeStorageCommit += storageCommitDuration.Start; accessor.AfterStorageCommit += () => { storageCommitDuration.Stop(); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; } var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats, linqExecutionDuration); var currentDocumentResults = new List<object>(); string currentKey = null; bool skipDocument = false; foreach (var currentDoc in mapResults) { token.ThrowIfCancellationRequested(); var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration); currentDocumentResults.Clear(); currentKey = documentId; } else if (skipDocument) { continue; } RavenJObject currentDocJObject; using (StopwatchScope.For(convertToRavenJObjectDuration)) { currentDocJObject = RavenJObject.FromObject(currentDoc, jsonSerializer); } currentDocumentResults.Add(new DynamicJsonObject(currentDocJObject)); if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false) { skipDocument = true; currentDocumentResults.Clear(); continue; } Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey, reduceInMapLinqExecutionDuration, putMappedResultsDuration, convertToRavenJObjectDuration); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_ReduceLinqExecution, reduceInMapLinqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_PutMappedResults, putMappedResultsDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ConvertToRavenJObject, convertToRavenJObjectDuration.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); } }); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); var parallelReductionOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelReductionStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds }; var scheduleReductionsDuration = new Stopwatch(); using (StopwatchScope.For(scheduleReductionsDuration)) { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); } } parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Map_ScheduleReductions, scheduleReductionsDuration.ElapsedMilliseconds)); parallelReductionOperations.Enqueue(parallelStats); })); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelReductionOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelReductionStart).TotalMilliseconds, BatchedOperations = parallelReductionOperations.ToList() }); UpdateIndexingStats(context, stats); performance.OnCompleted = () => BatchCompleted("Current Map", "Map", sourceCount, count, performanceStats); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); return performance; }
public void UpdateIndexingStats(int id, IndexingWorkStats stats) { var key = CreateKey(id); ushort version; var index = Load(tableStorage.IndexingStats, key, out version); index["attempts"] = index.Value<int>("attempts") + stats.IndexingAttempts; index["successes"] = index.Value<int>("successes") + stats.IndexingSuccesses; index["failures"] = index.Value<int>("failures") + stats.IndexingErrors; index["lastIndexingTime"] = SystemTime.UtcNow; tableStorage.IndexingStats.Add(writeBatch.Value, key, index, version); }
protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action) { if (disposed) throw new ObjectDisposedException("Index " + name + " has been disposed"); PreviousIndexTime = LastIndexTime; LastIndexTime = SystemTime.UtcNow; lock (writeLock) { bool shouldRecreateSearcher; var toDispose = new List<Action>(); Analyzer searchAnalyzer = null; var itemsInfo = new IndexedItemsInfo(null); bool flushed = false; try { waitReason = "Write"; try { searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose); } catch (Exception e) { context.AddError(name, "Creating Analyzer", e.ToString(), "Analyzer"); throw; } if (indexWriter == null) { CreateIndexWriter(); } var locker = directory.MakeLock("writing-to-index.lock"); try { var stats = new IndexingWorkStats(); try { if (locker.Obtain() == false) { throw new InvalidOperationException( string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index", name)); } itemsInfo = action(indexWriter, searchAnalyzer, stats); shouldRecreateSearcher = itemsInfo.ChangedDocs > 0; foreach (var indexExtension in indexExtensions.Values) { indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer); } } catch (Exception e) { context.AddError(name, null, e.ToString(), "Write"); throw; } if (itemsInfo.ChangedDocs > 0) { WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag); if(indexWriter != null && indexWriter.RamSizeInBytes() >= flushSize) { Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk flushed = true; } UpdateIndexingStats(context, stats); } } finally { locker.Release(); } } catch (Exception e) { throw new InvalidOperationException("Could not properly write to index " + name, e); } finally { currentlyIndexDocuments.Clear(); if (searchAnalyzer != null) searchAnalyzer.Close(); foreach (Action dispose in toDispose) { dispose(); } waitReason = null; LastIndexTime = SystemTime.UtcNow; } if (flushed) { try { HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo()); } catch (Exception e) { logIndexing.WarnException("Could not handle commit point properly, ignoring", e); } } if (shouldRecreateSearcher) RecreateSearcher(); } }
public void UpdateReduceStats(int id, IndexingWorkStats stats) { var key = CreateKey(id); ushort version; var reduceStats = Load(tableStorage.ReduceStats, key, out version); reduceStats["reduce_attempts"] = reduceStats.Value<int>("reduce_attempts") + stats.ReduceAttempts; reduceStats["reduce_successes"] = reduceStats.Value<int>("reduce_successes") + stats.ReduceSuccesses; reduceStats["reduce_failures"] = reduceStats.Value<int>("reduce_failures") + stats.ReduceErrors; tableStorage.ReduceStats.Add(writeBatch.Value, key, reduceStats, version); }
protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats) { Action<Exception, object> onError; return RobustEnumerationIndex(input, funcs, stats, out onError); }
protected void Write(Func<IndexWriter, Analyzer, IndexingWorkStats, int> action) { if (disposed) throw new ObjectDisposedException("Index " + name + " has been disposed"); LastIndexTime = SystemTime.UtcNow; lock (writeLock) { bool shouldRecreateSearcher; var toDispose = new List<Action>(); Analyzer searchAnalyzer = null; try { waitReason = "Write"; try { searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose); } catch (Exception e) { context.AddError(name, "Creating Analyzer", e.ToString()); throw; } if (indexWriter == null) { CreateIndexWriter(); } var locker = directory.MakeLock("writing-to-index.lock"); try { int changedDocs; var stats = new IndexingWorkStats(); try { changedDocs = action(indexWriter, searchAnalyzer, stats); shouldRecreateSearcher = changedDocs > 0; foreach (var indexExtension in indexExtensions.Values) { indexExtension.OnDocumentsIndexed(currentlyIndexDocuments); } } catch (Exception e) { context.AddError(name, null, e.ToString()); throw; } if (changedDocs > 0) { UpdateIndexingStats(context, stats); WriteTempIndexToDiskIfNeeded(context); Flush(); // just make sure changes are flushed to disk } } finally { locker.Release(); } } finally { currentlyIndexDocuments.Clear(); if (searchAnalyzer != null) searchAnalyzer.Close(); foreach (Action dispose in toDispose) { dispose(); } waitReason = null; LastIndexTime = SystemTime.UtcNow; } if (shouldRecreateSearcher) RecreateSearcher(); } }
protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func, IStorageActionsAccessor actions, IndexingWorkStats stats) { // not strictly accurate, but if we get that many errors, probably an error anyway. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch) { BeforeMoveNext = () => Interlocked.Increment(ref stats.ReduceAttempts), CancelMoveNext = () => Interlocked.Decrement(ref stats.ReduceAttempts), OnError = (exception, o) => { context.AddError(name, TryGetDocKey(o), exception.Message, "Reduce" ); logIndexing.WarnException( String.Format("Failed to execute indexing function on {0} on {1}", name, TryGetDocKey(o)), exception); stats.ReduceErrors++; } }.RobustEnumeration(input, func); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var changed = new HashSet<ReduceKeyAndBucket>(); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return doc; }) .Where(x => x is FilteredDocument == false); var items = new List<MapResultItem>(); var stats = new IndexingWorkStats(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { var mapResults = RobustEnumerationIndex( documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats) .ToList(); actions.MapReduce.UpdateRemovedMapReduceStats(name, changed); foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId)) { var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList(); foreach ( var doc in RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); items.Add(new MapResultItem { Data = data, DocId = docId, ReduceKey = reduceKey }); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } } IDictionary<string, HashSet<string>> result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); actions.General.MaybePulseTransaction(); } } foreach (var mapResultItem in items) { actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data); actions.General.MaybePulseTransaction(); } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed, Started = start }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }