private int ProcessBatch(AbstractViewGenerator viewGenerator, List <object> currentDocumentResults, string currentKey, HashSet <ReduceKeyAndBucket> changes, IStorageActionsAccessor actions, IDictionary <string, int> statsPerKey) { if (currentKey == null || currentDocumentResults.Count == 0) { return(0); } int count = 0; var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition); foreach (var doc in results) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, currentKey); continue; } string reduceKey = ReduceKeyToString(reduceValue); var data = GetMappedData(doc); actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data); statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1; actions.General.MaybePulseTransaction(); changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey)); } return(count); }
private int ProcessBatch(AbstractViewGenerator viewGenerator, List <object> currentDocumentResults, string currentKey, HashSet <ReduceKeyAndBucket> changes, IStorageActionsAccessor actions, IDictionary <string, int> statsPerKey) { if (currentKey == null || currentDocumentResults.Count == 0) { return(0); } var old = CurrentIndexingScope.Current; try { CurrentIndexingScope.Current = null; if (logIndexing.IsDebugEnabled) { var sb = new StringBuilder() .AppendFormat("Index {0} for document {1} resulted in:", PublicName, currentKey) .AppendLine(); foreach (var currentDocumentResult in currentDocumentResults) { sb.AppendLine(JsonConvert.SerializeObject(currentDocumentResult)); } logIndexing.Debug(sb.ToString()); } int count = 0; var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition); foreach (var doc in results) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, currentKey); continue; } string reduceKey = ReduceKeyToString(reduceValue); var data = GetMappedData(doc); logIndexing.Debug("Index {0} for document {1} resulted in ({2}): {3}", PublicName, currentKey, reduceKey, data); actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data); statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1; actions.General.MaybePulseTransaction(); changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey)); } return(count); } finally { CurrentIndexingScope.Current = old; } }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var changed = new HashSet <ReduceKeyAndBucket>(); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return(doc); }) .Where(x => x is FilteredDocument == false); var items = new List <MapResultItem>(); var stats = new IndexingWorkStats(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { var mapResults = RobustEnumerationIndex( documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats) .ToList(); actions.MapReduce.UpdateRemovedMapReduceStats(name, changed); foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId)) { var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList(); foreach ( var doc in RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); items.Add(new MapResultItem { Data = data, DocId = docId, ReduceKey = reduceKey }); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } } IDictionary <string, HashSet <string> > result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); actions.General.MaybePulseTransaction(); } } foreach (var mapResultItem in items) { actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data); actions.General.MaybePulseTransaction(); } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed, Started = start }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; var changed = new HashSet <ReduceKeyAndBucket>(); var documentsWrapped = batch.Docs.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return(doc); }) .Where(x => x is FilteredDocument == false); var items = new List <MapResultItem>(); var stats = new IndexingWorkStats(); foreach ( var mappedResultFromDocument in GroupByDocumentId(context, RobustEnumerationIndex(documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats))) { var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList(); foreach ( var doc in RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data); items.Add(new MapResultItem { Data = data, DocId = docId, ReduceKey = reduceKey }); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } int mapCount = 0; foreach (var mapResultItem in items) { actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data); if (mapCount++ % 50000 == 0) { // The reason this is here is to protect us from Version Store Out Of Memory error during indexing // this can happen if we have indexes that output a VERY large number of items per doc. actions.General.PulseTransaction(); } } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed, Started = start }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable <dynamic> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var changed = new HashSet <ReduceKeyAndBucket>(); var documentsWrapped = documents.Select(doc => { sourceCount++; var documentId = doc.__document_id; actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed); return(doc); }) .Where(x => x is FilteredDocument == false); var stats = new IndexingWorkStats(); foreach ( var mappedResultFromDocument in GroupByDocumentId(context, RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))) { foreach ( var doc in RobustEnumerationReduceDuringMapPhase(mappedResultFromDocument, viewGenerator.ReduceDefinition, actions, context)) { count++; var reduceValue = viewGenerator.GroupByExtraction(doc); if (reduceValue == null) { logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}", viewGenerator.GroupByExtraction, mappedResultFromDocument.Key); continue; } var reduceKey = ReduceKeyToString(reduceValue); var docId = mappedResultFromDocument.Key.ToString(); var data = GetMappedData(doc); logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data); actions.MapReduce.PutMappedResult(name, docId, reduceKey, data); changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey)); } } UpdateIndexingStats(context, stats); actions.MapReduce.ScheduleReductions(name, 0, changed); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Operation = "Map", Duration = sw.Elapsed }); logIndexing.Debug("Mapped {0} documents for {1}", count, name); }