public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; int loadDocumentCount = 0; long loadDocumentDuration = 0; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count); batch.SetIndexingPerformance(indexingPerfStats); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; int loadDocumentCount = 0; long loadDocumentDuration = 0; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet <string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count); batch.SetIndexingPerformance(indexingPerfStats); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) { throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) { return(doc); } InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action <Exception, object> onErrorFunc; bool skipDocument = false; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) { continue; } outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }); }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var deleted = new Dictionary<ReduceKeyAndBucket, int>(); var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count); batch.SetIndexingPerformance(indexPerfStats); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var allState = new ConcurrentQueue<Tuple<HashSet<ReduceKeyAndBucket>, IndexingWorkStats, Dictionary<string, int>>>(); int loadDocumentCount = 0; long loadDocumentDuration = 0; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { var localStats = new IndexingWorkStats(); var localChanges = new HashSet<ReduceKeyAndBucket>(); var statsPerKey = new Dictionary<string, int>(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats); var currentDocumentResults = new List<object>(); string currentKey = null; bool skipDocument = false; foreach (var currentDoc in mapResults) { var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); currentDocumentResults.Clear(); currentKey = documentId; } else if (skipDocument) { continue; } currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer))); if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false) { skipDocument = true; currentDocumentResults.Clear(); continue; } Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); } })); UpdateIndexingStats(context, stats); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = documentsWrapped.Count, Operation = "Map", Duration = sw.Elapsed, Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); BatchCompleted("Current Map"); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var deleted = new Dictionary <ReduceKeyAndBucket, int>(); var indexPerfStats = RecordCurrentBatch("Current Map", batch.Docs.Count); batch.SetIndexingPerformance(indexPerfStats); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, indexId, deleted); return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); var allState = new ConcurrentQueue <Tuple <HashSet <ReduceKeyAndBucket>, IndexingWorkStats, Dictionary <string, int> > >(); int loadDocumentCount = 0; long loadDocumentDuration = 0; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, partition => { var localStats = new IndexingWorkStats(); var localChanges = new HashSet <ReduceKeyAndBucket>(); var statsPerKey = new Dictionary <string, int>(); allState.Enqueue(Tuple.Create(localChanges, localStats, statsPerKey)); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { // we are writing to the transactional store from multiple threads here, and in a streaming fashion // should result in less memory and better perf context.TransactionalStorage.Batch(accessor => { var mapResults = RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, localStats); var currentDocumentResults = new List <object>(); string currentKey = null; bool skipDocument = false; foreach (var currentDoc in mapResults) { var documentId = GetDocumentId(currentDoc); if (documentId != currentKey) { count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); currentDocumentResults.Clear(); currentKey = documentId; } else if (skipDocument) { continue; } currentDocumentResults.Add(new DynamicJsonObject(RavenJObject.FromObject(currentDoc, jsonSerializer))); if (EnsureValidNumberOfOutputsForDocument(documentId, currentDocumentResults.Count) == false) { skipDocument = true; currentDocumentResults.Clear(); continue; } Interlocked.Increment(ref localStats.IndexingSuccesses); } count += ProcessBatch(viewGenerator, currentDocumentResults, currentKey, localChanges, accessor, statsPerKey); }); allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); var changed = allState.SelectMany(x => x.Item1).Concat(deleted.Keys) .Distinct() .ToList(); var stats = new IndexingWorkStats(allState.Select(x => x.Item2)); var reduceKeyStats = allState.SelectMany(x => x.Item3) .GroupBy(x => x.Key) .Select(g => new { g.Key, Count = g.Sum(x => x.Value) }) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, reduceKeyStats, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { var reduceKeyStat = enumerator.Current; accessor.MapReduce.IncrementReduceKeyCounter(indexId, reduceKeyStat.Key, reduceKeyStat.Count); } })); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, changed, enumerator => context.TransactionalStorage.Batch(accessor => { while (enumerator.MoveNext()) { accessor.MapReduce.ScheduleReductions(indexId, 0, enumerator.Current); } })); UpdateIndexingStats(context, stats); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = documentsWrapped.Count, Operation = "Map", Duration = sw.Elapsed, Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); BatchCompleted("Current Map"); logIndexing.Debug("Mapped {0} documents for {1}", count, indexId); }