Beispiel #1
0
            public ReduceDocuments(
                MapReduceIndex parent,
                AbstractViewGenerator viewGenerator,
                IEnumerable <IGrouping <int, object> > mappedResultsByBucket,
                int level,
                WorkContext context,
                IStorageActionsAccessor actions,
                HashSet <string> reduceKeys)
            {
                this.parent           = parent;
                name                  = this.parent.name;
                ViewGenerator         = viewGenerator;
                MappedResultsByBucket = mappedResultsByBucket;
                Level                 = level;
                Context               = context;
                Actions               = actions;
                ReduceKeys            = reduceKeys;

                anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(this.parent.indexDefinition);

                if (Level == 2)
                {
                    batchers = Context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                               .Where(x => x != null)
                               .ToList();
                }
            }
Beispiel #2
0
        private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
        {
            boost = 1;

            var boostedValue = doc as BoostedValue;

            if (boostedValue != null)
            {
                doc   = boostedValue.Value;
                boost = boostedValue.Boost;
            }

            IndexingResult indexingResult;

            if (doc is DynamicJsonObject)
            {
                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, (DynamicJsonObject)doc);
            }
            else
            {
                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
            }

            if (Math.Abs(boost - 1) > float.Epsilon)
            {
                foreach (var abstractField in indexingResult.Fields)
                {
                    abstractField.OmitNorms = false;
                }
            }

            return(indexingResult);
        }
Beispiel #3
0
        private IEnumerable <AbstractField> GetFields(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc, ref PropertyDescriptorCollection properties, out float boost)
        {
            boost = 1;
            var boostedValue = doc as BoostedValue;

            if (boostedValue != null)
            {
                doc   = boostedValue.Value;
                boost = boostedValue.Boost;
            }
            IEnumerable <AbstractField> fields;

            if (doc is IDynamicJsonObject)
            {
                fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)doc).Inner, Field.Store.YES);
            }
            else
            {
                properties = properties ?? TypeDescriptor.GetProperties(doc);
                fields     = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.YES);
            }
            if (Math.Abs(boost - 1) > float.Epsilon)
            {
                var abstractFields = fields.ToList();
                foreach (var abstractField in abstractFields)
                {
                    abstractField.SetOmitNorms(false);
                }
                return(abstractFields);
            }
            return(fields);
        }
Beispiel #4
0
 private IEnumerable <AbstractField> ExtractIndexDataFromDocument(PropertyDescriptorCollection properties, object doc, out string newDocId)
 {
     if (properties == null)
     {
         properties = TypeDescriptor.GetProperties(doc);
     }
     newDocId = properties.Find("__document_id", false).GetValue(doc) as string;
     return(AnonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.NO));
 }
Beispiel #5
0
        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
        {
            var newDocId = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();

            return(new IndexingResult
            {
                Fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(),
                NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId,
                ShouldSkip = false
            });
        }
Beispiel #6
0
        private IndexingResult ExtractIndexDataFromDocument(DynamicJsonObject dynamicJsonObject)
        {
            var newDocId = dynamicJsonObject.GetDocumentId();

            return(new IndexingResult
            {
                Fields = AnonymousObjectToLuceneDocumentConverter.Index(dynamicJsonObject.Inner, indexDefinition,
                                                                        Field.Store.NO),
                NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId,
                ShouldSkip = false
            });
        }
Beispiel #7
0
        private IEnumerable <AbstractField> GetFields(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc, ref PropertyDescriptorCollection properties)
        {
            IEnumerable <AbstractField> fields;

            if (doc is DynamicJsonObject)
            {
                fields = anonymousObjectToLuceneDocumentConverter.Index(((DynamicJsonObject)doc).Inner,
                                                                        indexDefinition, Field.Store.YES);
            }
            else
            {
                properties = properties ?? TypeDescriptor.GetProperties(doc);
                fields     = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.YES);
            }
            return(fields);
        }
Beispiel #8
0
        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
        {
            Type type = doc.GetType();
            PropertyDescriptorCollection properties =
                propertyDescriptorCache.GetOrAdd(type, TypeDescriptor.GetProperties);

            var abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList();

            return(new IndexingResult()
            {
                Fields = abstractFields,
                NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string,
                ShouldSkip = properties.Count > 1 &&               // we always have at least __document_id
                             abstractFields.Count == 0
            });
        }
Beispiel #9
0
        private IndexingResult ExtractIndexDataFromDocument(PropertyDescriptorCollection properties, object doc)
        {
            if (properties == null)
            {
                properties = TypeDescriptor.GetProperties(doc);
            }
            var abstractFields = AnonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.NO).ToList();

            return(new IndexingResult()
            {
                Fields = abstractFields,
                NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string,
                ShouldSkip = properties.Count > 1 && // we always have at least __document_id
                             abstractFields.Count == 0
            });
        }
Beispiel #10
0
        public override void IndexDocuments(
            AbstractViewGenerator viewGenerator,
            IEnumerable<object> documents,
            WorkContext context,
            DocumentStorageActions actions)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            Write(indexWriter =>
            {
                string currentId = null;
                var converter = new AnonymousObjectToLuceneDocumentConverter();
                PropertyDescriptorCollection properties = null;
                foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context))
                {
                    count++;

                    if (properties == null)
                    {
                        properties = TypeDescriptor.GetProperties(doc);
                    }
                    var newDocId = properties.Find("__document_id", false).GetValue(doc) as string;
                    var fields = converter.Index(doc, properties, indexDefinition);
                    if (currentId != newDocId) // new document id, so delete all old values matching it
                    {
                        indexWriter.DeleteDocuments(new Term("__document_id", newDocId));
                    }

                    if (newDocId != null)
                    {
                        var luceneDoc = new Document();
                        luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.UN_TOKENIZED));

                        currentId = newDocId;
                        CopyFieldsToDocumentButRemoveDuplicateValues(luceneDoc, fields);
                        log.DebugFormat("Indexing document {0}", luceneDoc);
                        indexWriter.AddDocument(luceneDoc);
                    }

                    actions.IncrementSuccessIndexing();
                }

                return currentId != null;
            });
            log.DebugFormat("Indexed {0} documents for {1}", count, name);
        }
Beispiel #11
0
        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
        {
            var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
            var newDocId         = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject;
            List <AbstractField> abstractFields;

            try
            {
                abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList();
            }
            catch (InvalidShapeException e)
            {
                throw new InvalidSpatialShapException(e, newDocId);
            }

            return(new IndexingResult
            {
                Fields = abstractFields,
                NewDocId = newDocId,
                ShouldSkip = false
            });
        }
Beispiel #12
0
        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
        {
            PropertyAccessor propertyAccessor;
            var newDocId = GetDocumentId(doc, out propertyAccessor);

            List <AbstractField> abstractFields;

            try
            {
                abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, propertyAccessor, Field.Store.NO).ToList();
            }
            catch (InvalidShapeException e)
            {
                throw new InvalidSpatialShapException(e, newDocId);
            }

            return(new IndexingResult
            {
                Fields = abstractFields,
                NewDocId = newDocId,
                ShouldSkip = propertyAccessor.Properies.Count > 1 && // we always have at least __document_id
                             abstractFields.Count == 0
            });
        }
Beispiel #13
0
        private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
        {
            boost = 1;

            var boostedValue = doc as BoostedValue;

            if (boostedValue != null)
            {
                doc   = boostedValue.Value;
                boost = boostedValue.Boost;
            }

            IndexingResult indexingResult;

            var docAsDynamicJsonObject = doc as DynamicJsonObject;

            // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
            if (docAsDynamicJsonObject != null)
            {
                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject);
            }
            else
            {
                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
            }

            if (Math.Abs(boost - 1) > float.Epsilon)
            {
                foreach (var abstractField in indexingResult.Fields)
                {
                    abstractField.OmitNorms = false;
                }
            }

            return(indexingResult);
        }
Beispiel #14
0
        private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
        {
            boost = 1;

            var boostedValue = doc as BoostedValue;
            if (boostedValue != null)
            {
                doc = boostedValue.Value;
                boost = boostedValue.Boost;
            }

            IndexingResult indexingResult;
            if (doc is DynamicJsonObject)
                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, (DynamicJsonObject)doc);
            else
                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);

            if (Math.Abs(boost - 1) > float.Epsilon)
            {
                foreach (var abstractField in indexingResult.Fields)
                {
                    abstractField.OmitNorms = false;
                }
            }

            return indexingResult;
        }
Beispiel #15
0
 private IEnumerable <AbstractField> ExtractIndexDataFromDocument(DynamicJsonObject dynamicJsonObject, out string newDocId)
 {
     newDocId = dynamicJsonObject.GetDocumentId();
     return(AnonymousObjectToLuceneDocumentConverter.Index(dynamicJsonObject.Inner, indexDefinition,
                                                           Field.Store.NO));
 }
Beispiel #16
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				var documentsWrapped = documents.Select((dynamic doc) =>
				{
					if(doc.__document_id == null)
						throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

					count++;
					string documentId = doc.__document_id.ToString();
					if (processedKeys.Add(documentId) == false)
						return doc;
					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
												   name, documentId),
								exception);
							context.AddError(name,
											 documentId,
											 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryDeleted(documentId));
					indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant()));
					return doc;
				});
				var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
				var luceneDoc = new Document();
				var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS);
				foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))
				{
					count++;

					float boost;
					var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

					if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
					{
						count += 1;
						luceneDoc.GetFields().Clear();
						luceneDoc.SetBoost(boost);
						documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
						luceneDoc.Add(documentIdField);
						foreach (var field in indexingResult.Fields)
						{
							luceneDoc.Add(field);
						}
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
													   name, indexingResult.NewDocId),
									exception);
								context.AddError(name,
												 indexingResult.NewDocId,
												 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
						LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
						AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
					}

					stats.IndexingSuccesses++;
				}
				batchers.ApplyAndIgnoreAllErrors(
					e =>
					{
						logIndexing.WarnException("Failed to dispose on index update trigger", e);
						context.AddError(name, null, e.Message);
					},
					x => x.Dispose());
				return count;
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
Beispiel #17
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = SystemTime.UtcNow;

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                                    .Where(x => x != null)
                                    .ToList();
                try
                {
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                        {
                            logIndexing.WarnException(
                                string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                              name, documentId),
                                exception);
                            context.AddError(name,
                                             documentId,
                                             exception.Message
                                             );
                        },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId))                             // maybe it is recently deleted?
                        {
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                    {
                                        logIndexing.WarnException(
                                            string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                          name, indexingResult.NewDocId),
                                            exception);
                                        context.AddError(name,
                                                         indexingResult.NewDocId,
                                                         exception.Message
                                                         );
                                    },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    IDictionary <string, HashSet <string> > result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                        }
                    }
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                        context.AddError(name, null, ex.Message);
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger", e);
                        context.AddError(name, null, e.Message);
                    },
                        x => x.Dispose());
                }
                return(sourceCount);
            });
            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Duration    = sw.Elapsed,
                Operation   = "Index",
                Started     = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
Beispiel #18
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable <object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count = 0;

            Write(context, (indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                                    .Where(x => x != null)
                                    .ToList();
                var documentsWrapped = documents.Select((dynamic doc) =>
                {
                    if (doc.__document_id == null)
                    {
                        throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                    }

                    count++;
                    string documentId = doc.__document_id.ToString();
                    if (processedKeys.Add(documentId) == false)
                    {
                        return(doc);
                    }
                    batchers.ApplyAndIgnoreAllErrors(
                        exception =>
                    {
                        logIndexing.WarnException(
                            string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                          name, documentId),
                            exception);
                        context.AddError(name,
                                         documentId,
                                         exception.Message
                                         );
                    },
                        trigger => trigger.OnIndexEntryDeleted(documentId));
                    indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant()));
                    return(doc);
                });
                var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
                var luceneDoc       = new Document();
                var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS);
                foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))
                {
                    count++;

                    float boost;
                    var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                    if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                    {
                        count += 1;
                        luceneDoc.GetFields().Clear();
                        luceneDoc.SetBoost(boost);
                        documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                        luceneDoc.Add(documentIdField);
                        foreach (var field in indexingResult.Fields)
                        {
                            luceneDoc.Add(field);
                        }
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                        {
                            logIndexing.WarnException(
                                string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                              name, indexingResult.NewDocId),
                                exception);
                            context.AddError(name,
                                             indexingResult.NewDocId,
                                             exception.Message
                                             );
                        },
                            trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                        LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                        AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                    }

                    stats.IndexingSuccesses++;
                }
                batchers.ApplyAndIgnoreAllErrors(
                    e =>
                {
                    logIndexing.WarnException("Failed to dispose on index update trigger", e);
                    context.AddError(name, null, e.Message);
                },
                    x => x.Dispose());
                return(count);
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
Beispiel #19
0
		private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
		{
			boost = 1;

			var boostedValue = doc as BoostedValue;
			if (boostedValue != null)
			{
				doc = boostedValue.Value;
				boost = boostedValue.Boost;
			}

			IndexingResult indexingResult;

			var docAsDynamicJsonObject = doc as DynamicJsonObject;

			// ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
			if (docAsDynamicJsonObject != null)
				indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject);
			else
				indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);

			if (Math.Abs(boost - 1) > float.Epsilon)
			{
				foreach (var abstractField in indexingResult.Fields)
				{
					abstractField.OmitNorms = false;
				}
			}

			return indexingResult;
		}
Beispiel #20
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count = 0;
            var sourceCount = 0;
            var sw = Stopwatch.StartNew();
            var start = SystemTime.UtcNow;
            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet<string>();
                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                    .Where(x => x != null)
                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                            return doc;
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                            {
                                logIndexing.WarnException(
                                    string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                                  name, documentId),
                                    exception);
                                context.AddError(name,
                                                 documentId,
                                                 exception.Message,
                                                 "OnIndexEntryDeleted Trigger"
                                    );
                            },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        return doc;
                    })
                        .Where(x => x is FilteredDocument == false)
                        .ToList();

                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
                        var luceneDoc = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                        {
                                            logIndexing.WarnException(
                                                string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                              name, indexingResult.NewDocId),
                                                exception);
                                            context.AddError(name,
                                                             indexingResult.NewDocId,
                                                             exception.Message,
                                                             "OnIndexEntryCreated Trigger"
                                                );
                                        },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex;
                    IDictionary<string, HashSet<string>> result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                            foreach (var childDocumentKey in referencedDocument.Value)
                            {
                                dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key);
                            }
                        }
                    }

                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                        {
                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                            context.AddError(name, null, ex.Message, "AnErrorOccured Trigger");
                        },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                        {
                            logIndexing.WarnException("Failed to dispose on index update trigger", e);
                            context.AddError(name, null, e.Message, "Dispose Trigger");
                        },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return new IndexedItemsInfo
                {
                    ChangedDocs = sourceCount,
                    HighestETag = batch.HighestEtagInBatch
                };
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount = sourceCount,
                InputCount = batch.Docs.Count,
                Duration = sw.Elapsed,
                Operation = "Index",
                Started = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
Beispiel #21
0
        public void ReduceDocuments(AbstractViewGenerator viewGenerator,
            IEnumerable<object> mappedResults,
            WorkContext context,
            DocumentStorageActions actions,
            string reduceKey)
        {
            actions.SetCurrentIndexStatsTo(name);
            var count = 0;
            Write(indexWriter =>
            {
                indexWriter.DeleteDocuments(new Term(viewGenerator.GroupByField, reduceKey));
                var converter = new AnonymousObjectToLuceneDocumentConverter();
                PropertyDescriptorCollection properties = null;
                foreach (var doc in RobustEnumeration(mappedResults, viewGenerator.ReduceDefinition, actions, context))
                {
                    count++;
                    if (properties == null)
                    {
                        properties = TypeDescriptor.GetProperties(doc);
                    }
                    var fields = converter.Index(doc, properties, indexDefinition);

                    var luceneDoc = new Document();
                    foreach (var field in fields)
                    {
                        luceneDoc.Add(field);
                    }

                    indexWriter.AddDocument(luceneDoc);
                    actions.IncrementSuccessIndexing();
                }

                return true;
            });
            log.DebugFormat("Reduce resulted in {0} entires for {1} for reduce key {2}", count, name, reduceKey);
        }
Beispiel #22
0
        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var count             = 0;
            var sourceCount       = 0;
            var writeToIndexStats = new List <PerformanceStats>();

            IndexingPerformanceStats performance = null;
            var performanceStats = new List <BasePerformanceStats>();

            var storageCommitDuration = new Stopwatch();

            actions.BeforeStorageCommit += storageCommitDuration.Start;

            actions.AfterStorageCommit += () =>
            {
                storageCommitDuration.Stop();

                performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
            };

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                    .Where(x => x != null)
                                    .ToList();

                try
                {
                    performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

                    var deleteExistingDocumentsDuration = new Stopwatch();

                    Interlocked.Increment(ref sourceCount);
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        token.ThrowIfCancellationRequested();
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }

                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                        {
                            using (StopwatchScope.For(deleteExistingDocumentsDuration))
                            {
                                indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                            }
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    performanceStats.Add(new PerformanceStats
                    {
                        Name       = IndexingOperation.Lucene_DeleteExistingDocument,
                        DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
                    });

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();

                    var parallelOperations = new ConcurrentQueue <ParallelBatchStats>();

                    var parallelProcessingStart = SystemTime.UtcNow;
                    context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator <dynamic> partition) =>
                    {
                        token.ThrowIfCancellationRequested();
                        var parallelStats = new ParallelBatchStats
                        {
                            StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
                        };

                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                        {
                            string currentDocId = null;
                            int outputPerDocId  = 0;
                            Action <Exception, object> onErrorFunc;
                            bool skipDocument = false;

                            var linqExecutionDuration           = new Stopwatch();
                            var addDocumentDutation             = new Stopwatch();
                            var convertToLuceneDocumentDuration = new Stopwatch();

                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
                            {
                                token.ThrowIfCancellationRequested();

                                float boost;
                                IndexingResult indexingResult;
                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                                {
                                    try
                                    {
                                        indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                    }
                                    catch (Exception e)
                                    {
                                        onErrorFunc(e, doc);
                                        continue;
                                    }
                                }

                                // ReSharper disable once RedundantBoolCompare --> code clarity
                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
                                {
                                    continue;
                                }
                                if (currentDocId != indexingResult.NewDocId)
                                {
                                    currentDocId   = indexingResult.NewDocId;
                                    outputPerDocId = 0;
                                    skipDocument   = false;
                                }
                                if (skipDocument)
                                {
                                    continue;
                                }
                                outputPerDocId++;
                                if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
                                {
                                    skipDocument = true;
                                    continue;
                                }
                                Interlocked.Increment(ref count);

                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
                                {
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                }

                                batchers.ApplyAndIgnoreAllErrors(
                                    exception =>
                                {
                                    logIndexing.WarnException(
                                        string.Format(
                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                            PublicName, indexingResult.NewDocId),
                                        exception);
                                    context.AddError(
                                        indexId,
                                        PublicName,
                                        indexingResult.NewDocId,
                                        exception,
                                        "OnIndexEntryCreated Trigger");
                                },
                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

                                using (StopwatchScope.For(addDocumentDutation))
                                {
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));

                            parallelOperations.Enqueue(parallelStats);
                        }
                    }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering));

                    performanceStats.Add(new ParallelPerformanceStats
                    {
                        NumberOfThreads   = parallelOperations.Count,
                        DurationMs        = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
                        BatchedOperations = parallelOperations.ToList()
                    });

                    var updateDocumentReferencesDuration = new Stopwatch();
                    using (StopwatchScope.For(updateDocumentReferencesDuration))
                    {
                        UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
                    }
                    performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
                        context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
                        context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
                    },
                        x => x.Dispose());
                }
                return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
                {
                    ChangedDocs = sourceCount
                });
            }, writeToIndexStats);

            performanceStats.AddRange(writeToIndexStats);

            InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats);

            if (logIndexing.IsDebugEnabled)
            {
                logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);
            }

            return(performance);
        }
Beispiel #23
0
		private IEnumerable<AbstractField> GetFields(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc, ref PropertyDescriptorCollection properties)
		{
			IEnumerable<AbstractField> fields;
			if (doc is DynamicJsonObject)
			{
				fields = anonymousObjectToLuceneDocumentConverter.Index(((DynamicJsonObject)doc).Inner,
																		indexDefinition, Field.Store.YES);
			}
			else
			{
				properties = properties ?? TypeDescriptor.GetProperties(doc);
				fields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.YES);
			}
			return fields;
		}
Beispiel #24
0
		private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
		{
			PropertyDescriptorCollection properties;
			var newDocId = GetDocumentIdByReflection(doc, out properties);

			List<AbstractField> abstractFields;
			try
			{
				abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList();
			}
			catch (InvalidShapeException e)
			{
				throw new InvalidSpatialShapeException(e, newDocId);
			}

			return new IndexingResult
			{
				Fields = abstractFields,
				NewDocId = newDocId,
				ShouldSkip = properties.Count > 1  // we always have at least __document_id
							&& abstractFields.Count == 0
			};
		}
Beispiel #25
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = SystemTime.UtcNow;

            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet <string>();
                var batchers      = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
                                    .Where(x => x != null)
                                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm        = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                        {
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                        }

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                        {
                            return(doc);
                        }

                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                        {
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                        }

                        return(doc);
                    })
                                           .Where(x => x is FilteredDocument == false)
                                           .ToList();

                    var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >();
                    var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
                        var luceneDoc       = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
                        {
                            string currentDocId = null;
                            int outputPerDocId  = 0;
                            Action <Exception, object> onErrorFunc;
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
                            {
                                float boost;
                                IndexingResult indexingResult;
                                try
                                {
                                    indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                }
                                catch (InvalidSpatialShapeException e)
                                {
                                    onErrorFunc(e, doc);
                                    continue;
                                }

                                try
                                {
                                    indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
                                }
                                catch (Exception e)
                                {
                                    onErrorFunc(e, doc);
                                    continue;
                                }

// ReSharper disable once RedundantBoolCompare --> code clarity
                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
                                {
                                    continue;
                                }
                                if (currentDocId != indexingResult.NewDocId)
                                {
                                    currentDocId   = indexingResult.NewDocId;
                                    outputPerDocId = 0;
                                }
                                outputPerDocId++;
                                EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId);
                                Interlocked.Increment(ref count);
                                luceneDoc.GetFields().Clear();
                                luceneDoc.Boost = boost;
                                documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                luceneDoc.Add(documentIdField);
                                foreach (var field in indexingResult.Fields)
                                {
                                    luceneDoc.Add(field);
                                }
                                batchers.ApplyAndIgnoreAllErrors(
                                    exception =>
                                {
                                    logIndexing.WarnException(
                                        string.Format(
                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                            indexId, indexingResult.NewDocId),
                                        exception);
                                    context.AddError(indexId,
                                                     indexingResult.NewDocId,
                                                     exception.Message,
                                                     "OnIndexEntryCreated Trigger"
                                                     );
                                },
                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
                        }
                    });
                    UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                    {
                        logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                        context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
                    },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                    {
                        logIndexing.WarnException("Failed to dispose on index update trigger", e);
                        context.AddError(indexId, null, e.Message, "Dispose Trigger");
                    },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
                {
                    ChangedDocs = sourceCount
                });
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount  = sourceCount,
                InputCount  = batch.Docs.Count,
                Duration    = sw.Elapsed,
                Operation   = "Index",
                Started     = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
        }
Beispiel #26
0
 private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
 {
     var newDocId = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
     return new IndexingResult
     {
         Fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(),
         NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId,
         ShouldSkip = false
     };
 }
Beispiel #27
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var writeToIndexStats = new List<PerformanceStats>();

			IndexingPerformanceStats performance = null;
			var performanceStats = new List<BasePerformanceStats>();

			var storageCommitDuration = new Stopwatch();

			actions.BeforeStorageCommit += storageCommitDuration.Start;

			actions.AfterStorageCommit += () =>
			{
				storageCommitDuration.Stop();

				performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
			};

			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();

				try
				{
					performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

					var deleteExistingDocumentsDuration = new Stopwatch();
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						token.ThrowIfCancellationRequested();

						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
						    context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
						{
							using (StopwatchScope.For(deleteExistingDocumentsDuration))
							{
								indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
							}
						}

						return doc;
					})
					.Where(x => x is FilteredDocument == false)
					.ToList();

					performanceStats.Add(new PerformanceStats
					{
						Name = IndexingOperation.Lucene_DeleteExistingDocument,
						DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
					});

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

					var parallelProcessingStart = SystemTime.UtcNow;

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
                        token.ThrowIfCancellationRequested();
						var parallelStats = new ParallelBatchStats
						{
							StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
						};

						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;

							var linqExecutionDuration = new Stopwatch();
							var addDocumentDutation = new Stopwatch();
							var convertToLuceneDocumentDuration = new Stopwatch();

							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
							{
								token.ThrowIfCancellationRequested();

								float boost;
								IndexingResult indexingResult;
								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									try
									{

										indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
									}
									catch (Exception e)
									{
										onErrorFunc(e, doc);
										continue;
									}
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);

								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									luceneDoc.GetFields().Clear();
									luceneDoc.Boost = boost;
									documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
									luceneDoc.Add(documentIdField);
									foreach (var field in indexingResult.Fields)
									{
										luceneDoc.Add(field);
									}
								}

								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											PublicName, indexingResult.NewDocId),
											exception);
										context.AddError(
											indexId,
											PublicName,
											indexingResult.NewDocId,
											exception,
											"OnIndexEntryCreated Trigger");
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

								using (StopwatchScope.For(addDocumentDutation))
								{
									AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
								}

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
							parallelOperations.Enqueue(parallelStats);

							parallelOperations.Enqueue(parallelStats);
						}
					});

					performanceStats.Add(new ParallelPerformanceStats
					{
						NumberOfThreads = parallelOperations.Count,
						DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
						BatchedOperations = parallelOperations.ToList()
					});

					var updateDocumentReferencesDuration = new Stopwatch();
					using (StopwatchScope.For(updateDocumentReferencesDuration))
					{
						UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
					}
					performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
							context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
							context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
						},
						x => x.Dispose());
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			}, writeToIndexStats);

			performanceStats.AddRange(writeToIndexStats);

			performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);

			logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);

			return performance;
		}
Beispiel #28
0
        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
        {
            Type type = doc.GetType();
            PropertyDescriptorCollection properties =
                propertyDescriptorCache.GetOrAdd(type, TypeDescriptor.GetProperties);

            var abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList();
            return new IndexingResult()
            {
                Fields = abstractFields,
                NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string,
                ShouldSkip = properties.Count > 1  // we always have at least __document_id
                            && abstractFields.Count == 0
            };
        }
		protected IEnumerable<AbstractField> CreateField(string name, object value, bool stored = false, bool indexed = true)
		{
			var indexDefinition = new IndexDefinition();
			indexDefinition.Indexes[name] = indexed ? FieldIndexing.Analyzed : FieldIndexing.NotAnalyzed;
			var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);

			return anonymousObjectToLuceneDocumentConverter.CreateFields(name, value, stored ? Field.Store.YES : Field.Store.NO);
		}
			public ReduceDocuments(MapReduceIndex parent, AbstractViewGenerator viewGenerator, IEnumerable<IGrouping<int, object>> mappedResultsByBucket, int level, WorkContext context, IStorageActionsAccessor actions, HashSet<string> reduceKeys, int inputCount)
			{
				this.parent = parent;
				this.inputCount = inputCount;
				indexId = this.parent.indexId;
				ViewGenerator = viewGenerator;
				MappedResultsByBucket = mappedResultsByBucket;
				Level = level;
				Context = context;
				Actions = actions;
				ReduceKeys = reduceKeys;

				anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(this.parent.context.Database, this.parent.indexDefinition, ViewGenerator, logIndexing);

				if (Level == 2)
				{
					batchers = Context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
								.Where(x => x != null)
								.ToList();
				}
			}
Beispiel #31
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				try
				{
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc,i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
									              name, documentId),
									exception);
								context.AddError(name,
								                 documentId,
								                 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryDeleted(documentId));
						if(batch.SkipDeleteFromIndex[i] == false)
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();


					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
						                                Field.Index.NOT_ANALYZED_NO_NORMS);

						foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats))
						{
							float boost;
							var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

							if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
							{
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
											string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											              name, indexingResult.NewDocId),
											exception);
										context.AddError(name,
										                 indexingResult.NewDocId,
										                 exception.Message
											);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
							}

							Interlocked.Increment(ref stats.IndexingSuccesses);
						}
					});
				}
				catch(Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(name, null, ex.Message);
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(name, null, e.Message);
						},
						x => x.Dispose());
				}
				return sourceCount;
			});
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
Beispiel #32
0
		private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
		{
			var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
			var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject;
			List<AbstractField> abstractFields;

			try
			{
				abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList();
			}
			catch (InvalidShapeException e)
			{
				throw new InvalidSpatialShapeException(e, newDocId);
			}

			return new IndexingResult
			{
				Fields = abstractFields,
				NewDocId = newDocId,
				ShouldSkip = false
			};
		}
Beispiel #33
0
        // This method may be called concurrently, by both the ReduceTask (for removal)
        // and by the ReducingExecuter (for add/modify). This is okay with us, since the
        // Write() call is already handling locking properly
        public void ReduceDocuments(AbstractViewGenerator viewGenerator,
                                    IEnumerable <object> mappedResults,
                                    WorkContext context,
                                    IStorageActionsAccessor actions,
                                    string[] reduceKeys)
        {
            var count = 0;

            Write(context, (indexWriter, analyzer, stats) =>
            {
                stats.Operation = IndexingWorkStats.Status.Reduce;
                var batchers    = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                                  .Where(x => x != null)
                                  .ToList();
                foreach (var reduceKey in reduceKeys)
                {
                    var entryKey = reduceKey;
                    indexWriter.DeleteDocuments(new Term(Abstractions.Data.Constants.ReduceKeyFieldName, entryKey.ToLowerInvariant()));
                    batchers.ApplyAndIgnoreAllErrors(
                        exception =>
                    {
                        logIndexing.WarnException(
                            string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                          name, entryKey),
                            exception);
                        context.AddError(name,
                                         entryKey,
                                         exception.Message
                                         );
                    },
                        trigger => trigger.OnIndexEntryDeleted(entryKey));
                }
                PropertyDescriptorCollection properties      = null;
                var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
                var luceneDoc      = new Document();
                var reduceKeyField = new Field(Constants.ReduceKeyFieldName, "dummy",
                                               Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
                foreach (var doc in RobustEnumerationReduce(mappedResults, viewGenerator.ReduceDefinition, actions, context, stats))
                {
                    count++;
                    float boost;
                    var fields = GetFields(anonymousObjectToLuceneDocumentConverter, doc, ref properties, out boost).ToList();

                    string reduceKeyAsString = ExtractReduceKey(viewGenerator, doc);
                    reduceKeyField.SetValue(reduceKeyAsString.ToLowerInvariant());

                    luceneDoc.GetFields().Clear();
                    luceneDoc.SetBoost(boost);
                    luceneDoc.Add(reduceKeyField);
                    foreach (var field in fields)
                    {
                        luceneDoc.Add(field);
                    }

                    batchers.ApplyAndIgnoreAllErrors(
                        exception =>
                    {
                        logIndexing.WarnException(
                            string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                          name, reduceKeyAsString),
                            exception);
                        context.AddError(name,
                                         reduceKeyAsString,
                                         exception.Message
                                         );
                    },
                        trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc));

                    LogIndexedDocument(reduceKeyAsString, luceneDoc);

                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                    stats.ReduceSuccesses++;
                }
                batchers.ApplyAndIgnoreAllErrors(
                    e =>
                {
                    logIndexing.WarnException("Failed to dispose on index update trigger", e);
                    context.AddError(name, null, e.Message);
                },
                    x => x.Dispose());
                return(count + reduceKeys.Length);
            });
            logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name,
                                                  string.Join(", ", reduceKeys)));
        }
Beispiel #34
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
Beispiel #35
0
		// This method may be called concurrently, by both the ReduceTask (for removal)
		// and by the ReducingExecuter (for add/modify). This is okay with us, since the 
		// Write() call is already handling locking properly
		public void ReduceDocuments(AbstractViewGenerator viewGenerator,
									IEnumerable<object> mappedResults,
									WorkContext context,
									IStorageActionsAccessor actions,
									string[] reduceKeys)
		{
			var count = 0;
			Write(context, (indexWriter, analyzer) =>
			{
				actions.Indexing.SetCurrentIndexStatsTo(name);
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				foreach (var reduceKey in reduceKeys)
				{
					var entryKey = reduceKey;
					indexWriter.DeleteDocuments(new Term(Abstractions.Data.Constants.ReduceKeyFieldName, entryKey.ToLowerInvariant()));
					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
								              name, entryKey),
								exception);
							context.AddError(name,
							                 entryKey,
							                 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryDeleted(entryKey));
				}
				PropertyDescriptorCollection properties = null;
				var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
				var luceneDoc = new Document();
				var reduceKeyField = new Field(Abstractions.Data.Constants.ReduceKeyFieldName, "dummy",
				                      Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
				foreach (var doc in RobustEnumerationReduce(mappedResults, viewGenerator.ReduceDefinition, actions, context))
				{
					count++;
					var fields = GetFields(anonymousObjectToLuceneDocumentConverter, doc, ref properties).ToList();

					string reduceKeyAsString = ExtractReduceKey(viewGenerator, doc);
					reduceKeyField.SetValue(reduceKeyAsString.ToLowerInvariant());

					luceneDoc.GetFields().Clear();
					luceneDoc.Add(reduceKeyField);
					foreach (var field in fields)
					{
						luceneDoc.Add(field);
					}

					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
								              name, reduceKeyAsString),
								exception);
							context.AddError(name,
							                 reduceKeyAsString,
							                 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc));
					logIndexing.Debug("Reduce key {0} result in index {1} gave document: {2}", reduceKeyAsString, name, luceneDoc);
					AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
					actions.Indexing.IncrementReduceSuccessIndexing();
				}
				batchers.ApplyAndIgnoreAllErrors(
					e =>
					{
						logIndexing.WarnException("Failed to dispose on index update trigger", e);
						context.AddError(name, null, e.Message);
					},
					x => x.Dispose());
				return true;
			});
			logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name,
							  string.Join(", ", reduceKeys)));
		}