Beispiel #1
0
        private int ProcessBatch(AbstractViewGenerator viewGenerator, List <object> currentDocumentResults, string currentKey, HashSet <ReduceKeyAndBucket> changes,
                                 IStorageActionsAccessor actions,
                                 IDictionary <string, int> statsPerKey)
        {
            if (currentKey == null || currentDocumentResults.Count == 0)
            {
                return(0);
            }


            int count   = 0;
            var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition);

            foreach (var doc in results)
            {
                count++;

                var reduceValue = viewGenerator.GroupByExtraction(doc);
                if (reduceValue == null)
                {
                    logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                      viewGenerator.GroupByExtraction, currentKey);
                    continue;
                }
                string reduceKey = ReduceKeyToString(reduceValue);

                var data = GetMappedData(doc);

                actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data);
                statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1;
                actions.General.MaybePulseTransaction();
                changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey));
            }
            return(count);
        }
Beispiel #2
0
        private int ProcessBatch(AbstractViewGenerator viewGenerator, List <object> currentDocumentResults, string currentKey, HashSet <ReduceKeyAndBucket> changes,
                                 IStorageActionsAccessor actions,
                                 IDictionary <string, int> statsPerKey)
        {
            if (currentKey == null || currentDocumentResults.Count == 0)
            {
                return(0);
            }
            var old = CurrentIndexingScope.Current;

            try
            {
                CurrentIndexingScope.Current = null;

                if (logIndexing.IsDebugEnabled)
                {
                    var sb = new StringBuilder()
                             .AppendFormat("Index {0} for document {1} resulted in:", PublicName, currentKey)
                             .AppendLine();
                    foreach (var currentDocumentResult in currentDocumentResults)
                    {
                        sb.AppendLine(JsonConvert.SerializeObject(currentDocumentResult));
                    }
                    logIndexing.Debug(sb.ToString());
                }

                int count   = 0;
                var results = RobustEnumerationReduceDuringMapPhase(currentDocumentResults.GetEnumerator(), viewGenerator.ReduceDefinition);
                foreach (var doc in results)
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, currentKey);
                        continue;
                    }
                    string reduceKey = ReduceKeyToString(reduceValue);

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Index {0} for document {1} resulted in ({2}): {3}", PublicName, currentKey, reduceKey, data);
                    actions.MapReduce.PutMappedResult(indexId, currentKey, reduceKey, data);
                    statsPerKey[reduceKey] = statsPerKey.GetOrDefault(reduceKey) + 1;
                    actions.General.MaybePulseTransaction();
                    changes.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(currentKey), reduceKey));
                }
                return(count);
            }
            finally
            {
                CurrentIndexingScope.Current = old;
            }
        }
Beispiel #3
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var items             = new List <MapResultItem>();
            var stats             = new IndexingWorkStats();
            var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();

            using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
            {
                var mapResults = RobustEnumerationIndex(
                    documentsWrapped.GetEnumerator(),
                    viewGenerator.MapDefinitions,
                    actions,
                    stats)
                                 .ToList();
                actions.MapReduce.UpdateRemovedMapReduceStats(name, changed);

                foreach (var mappedResultFromDocument in mapResults.GroupBy(GetDocumentId))
                {
                    var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                    foreach (
                        var doc in
                        RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                    {
                        count++;

                        var reduceValue = viewGenerator.GroupByExtraction(doc);
                        if (reduceValue == null)
                        {
                            logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                              viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                            continue;
                        }
                        var reduceKey = ReduceKeyToString(reduceValue);
                        var docId     = mappedResultFromDocument.Key.ToString();

                        var data = GetMappedData(doc);

                        items.Add(new MapResultItem
                        {
                            Data      = data,
                            DocId     = docId,
                            ReduceKey = reduceKey
                        });

                        changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                    }
                }
            }

            IDictionary <string, HashSet <string> > result;

            while (allReferencedDocs.TryDequeue(out result))
            {
                foreach (var referencedDocument in result)
                {
                    actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                    actions.General.MaybePulseTransaction();
                }
            }

            foreach (var mapResultItem in items)
            {
                actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
                actions.General.MaybePulseTransaction();
            }

            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Beispiel #4
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IndexingBatch batch,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var start            = SystemTime.UtcNow;
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = batch.Docs.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var items = new List <MapResultItem>();
            var stats = new IndexingWorkStats();

            foreach (
                var mappedResultFromDocument in
                GroupByDocumentId(context,
                                  RobustEnumerationIndex(documentsWrapped.GetEnumerator(), viewGenerator.MapDefinitions, actions, stats)))
            {
                var dynamicResults = mappedResultFromDocument.Select(x => (object)new DynamicJsonObject(RavenJObject.FromObject(x, jsonSerializer))).ToList();
                foreach (
                    var doc in
                    RobustEnumerationReduceDuringMapPhase(dynamicResults.GetEnumerator(), viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    items.Add(new MapResultItem
                    {
                        Data      = data,
                        DocId     = docId,
                        ReduceKey = reduceKey
                    });

                    changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                }
            }

            int mapCount = 0;

            foreach (var mapResultItem in items)
            {
                actions.MapReduce.PutMappedResult(name, mapResultItem.DocId, mapResultItem.ReduceKey, mapResultItem.Data);
                if (mapCount++ % 50000 == 0)
                {
                    // The reason this is here is to protect us from Version Store Out Of Memory error during indexing
                    // this can happen if we have indexes that output a VERY large number of items per doc.
                    actions.General.PulseTransaction();
                }
            }

            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed,
                Started     = start
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }
Beispiel #5
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable <dynamic> documents,
            WorkContext context,
            IStorageActionsAccessor actions,
            DateTime minimumTimestamp)
        {
            var count            = 0;
            var sourceCount      = 0;
            var sw               = Stopwatch.StartNew();
            var changed          = new HashSet <ReduceKeyAndBucket>();
            var documentsWrapped = documents.Select(doc =>
            {
                sourceCount++;
                var documentId = doc.__document_id;
                actions.MapReduce.DeleteMappedResultsForDocumentId((string)documentId, name, changed);
                return(doc);
            })
                                   .Where(x => x is FilteredDocument == false);
            var stats = new IndexingWorkStats();

            foreach (
                var mappedResultFromDocument in
                GroupByDocumentId(context,
                                  RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats)))
            {
                foreach (
                    var doc in
                    RobustEnumerationReduceDuringMapPhase(mappedResultFromDocument, viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;

                    var reduceValue = viewGenerator.GroupByExtraction(doc);
                    if (reduceValue == null)
                    {
                        logIndexing.Debug("Field {0} is used as the reduce key and cannot be null, skipping document {1}",
                                          viewGenerator.GroupByExtraction, mappedResultFromDocument.Key);
                        continue;
                    }
                    var reduceKey = ReduceKeyToString(reduceValue);
                    var docId     = mappedResultFromDocument.Key.ToString();

                    var data = GetMappedData(doc);

                    logIndexing.Debug("Mapped result for index '{0}' doc '{1}': '{2}'", name, docId, data);

                    actions.MapReduce.PutMappedResult(name, docId, reduceKey, data);

                    changed.Add(new ReduceKeyAndBucket(IndexingUtil.MapBucket(docId), reduceKey));
                }
            }
            UpdateIndexingStats(context, stats);
            actions.MapReduce.ScheduleReductions(name, 0, changed);
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Operation   = "Map",
                Duration    = sw.Elapsed
            });
            logIndexing.Debug("Mapped {0} documents for {1}", count, name);
        }