public ReduceDocuments( MapReduceIndex parent, AbstractViewGenerator viewGenerator, IEnumerable <IGrouping <int, object> > mappedResultsByBucket, int level, WorkContext context, IStorageActionsAccessor actions, HashSet <string> reduceKeys) { this.parent = parent; name = this.parent.name; ViewGenerator = viewGenerator; MappedResultsByBucket = mappedResultsByBucket; Level = level; Context = context; Actions = actions; ReduceKeys = reduceKeys; anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(this.parent.indexDefinition); if (Level == 2) { batchers = Context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); } }
private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost) { boost = 1; var boostedValue = doc as BoostedValue; if (boostedValue != null) { doc = boostedValue.Value; boost = boostedValue.Boost; } IndexingResult indexingResult; if (doc is DynamicJsonObject) { indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, (DynamicJsonObject)doc); } else { indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc); } if (Math.Abs(boost - 1) > float.Epsilon) { foreach (var abstractField in indexingResult.Fields) { abstractField.OmitNorms = false; } } return(indexingResult); }
private IEnumerable <AbstractField> GetFields(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc, ref PropertyDescriptorCollection properties, out float boost) { boost = 1; var boostedValue = doc as BoostedValue; if (boostedValue != null) { doc = boostedValue.Value; boost = boostedValue.Boost; } IEnumerable <AbstractField> fields; if (doc is IDynamicJsonObject) { fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)doc).Inner, Field.Store.YES); } else { properties = properties ?? TypeDescriptor.GetProperties(doc); fields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.YES); } if (Math.Abs(boost - 1) > float.Epsilon) { var abstractFields = fields.ToList(); foreach (var abstractField in abstractFields) { abstractField.SetOmitNorms(false); } return(abstractFields); } return(fields); }
private IEnumerable <AbstractField> ExtractIndexDataFromDocument(PropertyDescriptorCollection properties, object doc, out string newDocId) { if (properties == null) { properties = TypeDescriptor.GetProperties(doc); } newDocId = properties.Find("__document_id", false).GetValue(doc) as string; return(AnonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.NO)); }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject) { var newDocId = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId(); return(new IndexingResult { Fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(), NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId, ShouldSkip = false }); }
private IndexingResult ExtractIndexDataFromDocument(DynamicJsonObject dynamicJsonObject) { var newDocId = dynamicJsonObject.GetDocumentId(); return(new IndexingResult { Fields = AnonymousObjectToLuceneDocumentConverter.Index(dynamicJsonObject.Inner, indexDefinition, Field.Store.NO), NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId, ShouldSkip = false }); }
private IEnumerable <AbstractField> GetFields(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc, ref PropertyDescriptorCollection properties) { IEnumerable <AbstractField> fields; if (doc is DynamicJsonObject) { fields = anonymousObjectToLuceneDocumentConverter.Index(((DynamicJsonObject)doc).Inner, indexDefinition, Field.Store.YES); } else { properties = properties ?? TypeDescriptor.GetProperties(doc); fields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.YES); } return(fields); }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc) { Type type = doc.GetType(); PropertyDescriptorCollection properties = propertyDescriptorCache.GetOrAdd(type, TypeDescriptor.GetProperties); var abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList(); return(new IndexingResult() { Fields = abstractFields, NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string, ShouldSkip = properties.Count > 1 && // we always have at least __document_id abstractFields.Count == 0 }); }
private IndexingResult ExtractIndexDataFromDocument(PropertyDescriptorCollection properties, object doc) { if (properties == null) { properties = TypeDescriptor.GetProperties(doc); } var abstractFields = AnonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.NO).ToList(); return(new IndexingResult() { Fields = abstractFields, NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string, ShouldSkip = properties.Count > 1 && // we always have at least __document_id abstractFields.Count == 0 }); }
public override void IndexDocuments( AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, DocumentStorageActions actions) { actions.SetCurrentIndexStatsTo(name); var count = 0; Write(indexWriter => { string currentId = null; var converter = new AnonymousObjectToLuceneDocumentConverter(); PropertyDescriptorCollection properties = null; foreach (var doc in RobustEnumeration(documents, viewGenerator.MapDefinition, actions, context)) { count++; if (properties == null) { properties = TypeDescriptor.GetProperties(doc); } var newDocId = properties.Find("__document_id", false).GetValue(doc) as string; var fields = converter.Index(doc, properties, indexDefinition); if (currentId != newDocId) // new document id, so delete all old values matching it { indexWriter.DeleteDocuments(new Term("__document_id", newDocId)); } if (newDocId != null) { var luceneDoc = new Document(); luceneDoc.Add(new Field("__document_id", newDocId, Field.Store.YES, Field.Index.UN_TOKENIZED)); currentId = newDocId; CopyFieldsToDocumentButRemoveDuplicateValues(luceneDoc, fields); log.DebugFormat("Indexing document {0}", luceneDoc); indexWriter.AddDocument(luceneDoc); } actions.IncrementSuccessIndexing(); } return currentId != null; }); log.DebugFormat("Indexed {0} documents for {1}", count, name); }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject) { var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId(); var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject; List <AbstractField> abstractFields; try { abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(); } catch (InvalidShapeException e) { throw new InvalidSpatialShapException(e, newDocId); } return(new IndexingResult { Fields = abstractFields, NewDocId = newDocId, ShouldSkip = false }); }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc) { PropertyAccessor propertyAccessor; var newDocId = GetDocumentId(doc, out propertyAccessor); List <AbstractField> abstractFields; try { abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, propertyAccessor, Field.Store.NO).ToList(); } catch (InvalidShapeException e) { throw new InvalidSpatialShapException(e, newDocId); } return(new IndexingResult { Fields = abstractFields, NewDocId = newDocId, ShouldSkip = propertyAccessor.Properies.Count > 1 && // we always have at least __document_id abstractFields.Count == 0 }); }
private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost) { boost = 1; var boostedValue = doc as BoostedValue; if (boostedValue != null) { doc = boostedValue.Value; boost = boostedValue.Boost; } IndexingResult indexingResult; var docAsDynamicJsonObject = doc as DynamicJsonObject; // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression if (docAsDynamicJsonObject != null) { indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject); } else { indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc); } if (Math.Abs(boost - 1) > float.Epsilon) { foreach (var abstractField in indexingResult.Fields) { abstractField.OmitNorms = false; } } return(indexingResult); }
private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost) { boost = 1; var boostedValue = doc as BoostedValue; if (boostedValue != null) { doc = boostedValue.Value; boost = boostedValue.Boost; } IndexingResult indexingResult; if (doc is DynamicJsonObject) indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, (DynamicJsonObject)doc); else indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc); if (Math.Abs(boost - 1) > float.Epsilon) { foreach (var abstractField in indexingResult.Fields) { abstractField.OmitNorms = false; } } return indexingResult; }
private IEnumerable <AbstractField> ExtractIndexDataFromDocument(DynamicJsonObject dynamicJsonObject, out string newDocId) { newDocId = dynamicJsonObject.GetDocumentId(); return(AnonymousObjectToLuceneDocumentConverter.Index(dynamicJsonObject.Inner, indexDefinition, Field.Store.NO)); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; Write(context, (indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); var documentsWrapped = documents.Select((dynamic doc) => { if(doc.__document_id == null) throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); count++; string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant())); return doc; }); var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats)) { count++; float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { count += 1; luceneDoc.GetFields().Clear(); luceneDoc.SetBoost(boost); documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } stats.IndexingSuccesses++; } batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); return count; }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet <string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) { throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) { return(doc); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } } }); IDictionary <string, HashSet <string> > result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); } } } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); } return(sourceCount); }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable <object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; Write(context, (indexWriter, analyzer, stats) => { var processedKeys = new HashSet <string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); var documentsWrapped = documents.Select((dynamic doc) => { if (doc.__document_id == null) { throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } count++; string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) { return(doc); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant())); return(doc); }); var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats)) { count++; float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { count += 1; luceneDoc.GetFields().Clear(); luceneDoc.SetBoost(boost); documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } stats.IndexingSuccesses++; } batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); return(count); }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost) { boost = 1; var boostedValue = doc as BoostedValue; if (boostedValue != null) { doc = boostedValue.Value; boost = boostedValue.Boost; } IndexingResult indexingResult; var docAsDynamicJsonObject = doc as DynamicJsonObject; // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression if (docAsDynamicJsonObject != null) indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject); else indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc); if (Math.Abs(boost - 1) > float.Epsilon) { foreach (var abstractField in indexingResult.Fields) { abstractField.OmitNorms = false; } } return indexingResult; }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { RecordCurrentBatch("Current", batch.Docs.Count); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message, "OnIndexEntryDeleted Trigger" ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } } }); var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex; IDictionary<string, HashSet<string>> result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); foreach (var childDocumentKey in referencedDocument.Value) { dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key); } } } } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo { ChangedDocs = sourceCount, HighestETag = batch.HighestEtagInBatch }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
public void ReduceDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> mappedResults, WorkContext context, DocumentStorageActions actions, string reduceKey) { actions.SetCurrentIndexStatsTo(name); var count = 0; Write(indexWriter => { indexWriter.DeleteDocuments(new Term(viewGenerator.GroupByField, reduceKey)); var converter = new AnonymousObjectToLuceneDocumentConverter(); PropertyDescriptorCollection properties = null; foreach (var doc in RobustEnumeration(mappedResults, viewGenerator.ReduceDefinition, actions, context)) { count++; if (properties == null) { properties = TypeDescriptor.GetProperties(doc); } var fields = converter.Index(doc, properties, indexDefinition); var luceneDoc = new Document(); foreach (var field in fields) { luceneDoc.Add(field); } indexWriter.AddDocument(luceneDoc); actions.IncrementSuccessIndexing(); } return true; }); log.DebugFormat("Reduce resulted in {0} entires for {1} for reduce key {2}", count, name, reduceKey); }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var writeToIndexStats = new List <PerformanceStats>(); IndexingPerformanceStats performance = null; var performanceStats = new List <BasePerformanceStats>(); var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet <string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count); var deleteExistingDocumentsDuration = new Stopwatch(); Interlocked.Increment(ref sourceCount); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { token.ThrowIfCancellationRequested(); if (doc.__document_id == null) { throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) { return(doc); } InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { using (StopwatchScope.For(deleteExistingDocumentsDuration)) { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } } return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Lucene_DeleteExistingDocument, DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds }); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); var parallelOperations = new ConcurrentQueue <ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator <dynamic> partition) => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action <Exception, object> onErrorFunc; bool skipDocument = false; var linqExecutionDuration = new Stopwatch(); var addDocumentDutation = new Stopwatch(); var convertToLuceneDocumentDuration = new Stopwatch(); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration)) { token.ThrowIfCancellationRequested(); float boost; IndexingResult indexingResult; using (StopwatchScope.For(convertToLuceneDocumentDuration)) { try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) { continue; } outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); using (StopwatchScope.For(convertToLuceneDocumentDuration)) { luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", PublicName, indexingResult.NewDocId), exception); context.AddError( indexId, PublicName, indexingResult.NewDocId, exception, "OnIndexEntryCreated Trigger"); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); using (StopwatchScope.For(addDocumentDutation)) { AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); } }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering)); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex); context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e); context.AddError(indexId, PublicName, null, e, "Dispose Trigger"); }, x => x.Dispose()); } return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }); }, writeToIndexStats); performanceStats.AddRange(writeToIndexStats); InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats); if (logIndexing.IsDebugEnabled) { logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName); } return(performance); }
private IEnumerable<AbstractField> GetFields(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc, ref PropertyDescriptorCollection properties) { IEnumerable<AbstractField> fields; if (doc is DynamicJsonObject) { fields = anonymousObjectToLuceneDocumentConverter.Index(((DynamicJsonObject)doc).Inner, indexDefinition, Field.Store.YES); } else { properties = properties ?? TypeDescriptor.GetProperties(doc); fields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, indexDefinition, Field.Store.YES); } return fields; }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc) { PropertyDescriptorCollection properties; var newDocId = GetDocumentIdByReflection(doc, out properties); List<AbstractField> abstractFields; try { abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList(); } catch (InvalidShapeException e) { throw new InvalidSpatialShapeException(e, newDocId); } return new IndexingResult { Fields = abstractFields, NewDocId = newDocId, ShouldSkip = properties.Count > 1 // we always have at least __document_id && abstractFields.Count == 0 }; }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet <string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { RecordCurrentBatch("Current", batch.Docs.Count); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) { throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); } string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) { return(doc); } InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } return(doc); }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue <IDictionary <string, HashSet <string> > >(); var allReferenceEtags = new ConcurrentQueue <IDictionary <string, Etag> >(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action <Exception, object> onErrorFunc; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (InvalidSpatialShapeException e) { onErrorFunc(e, doc); continue; } try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; } outputPerDocId++; EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId); Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return(new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }); }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject) { var newDocId = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId(); return new IndexingResult { Fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(), NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId, ShouldSkip = false }; }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var writeToIndexStats = new List<PerformanceStats>(); IndexingPerformanceStats performance = null; var performanceStats = new List<BasePerformanceStats>(); var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count); var deleteExistingDocumentsDuration = new Stopwatch(); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { token.ThrowIfCancellationRequested(); Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { using (StopwatchScope.For(deleteExistingDocumentsDuration)) { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } } return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Lucene_DeleteExistingDocument, DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds }); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; var linqExecutionDuration = new Stopwatch(); var addDocumentDutation = new Stopwatch(); var convertToLuceneDocumentDuration = new Stopwatch(); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration)) { token.ThrowIfCancellationRequested(); float boost; IndexingResult indexingResult; using (StopwatchScope.For(convertToLuceneDocumentDuration)) { try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); using (StopwatchScope.For(convertToLuceneDocumentDuration)) { luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", PublicName, indexingResult.NewDocId), exception); context.AddError( indexId, PublicName, indexingResult.NewDocId, exception, "OnIndexEntryCreated Trigger"); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); using (StopwatchScope.For(addDocumentDutation)) { AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); parallelOperations.Enqueue(parallelStats); } }); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex); context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e); context.AddError(indexId, PublicName, null, e, "Dispose Trigger"); }, x => x.Dispose()); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }, writeToIndexStats); performanceStats.AddRange(writeToIndexStats); performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats); logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName); return performance; }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc) { Type type = doc.GetType(); PropertyDescriptorCollection properties = propertyDescriptorCache.GetOrAdd(type, TypeDescriptor.GetProperties); var abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList(); return new IndexingResult() { Fields = abstractFields, NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string, ShouldSkip = properties.Count > 1 // we always have at least __document_id && abstractFields.Count == 0 }; }
protected IEnumerable<AbstractField> CreateField(string name, object value, bool stored = false, bool indexed = true) { var indexDefinition = new IndexDefinition(); indexDefinition.Indexes[name] = indexed ? FieldIndexing.Analyzed : FieldIndexing.NotAnalyzed; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); return anonymousObjectToLuceneDocumentConverter.CreateFields(name, value, stored ? Field.Store.YES : Field.Store.NO); }
public ReduceDocuments(MapReduceIndex parent, AbstractViewGenerator viewGenerator, IEnumerable<IGrouping<int, object>> mappedResultsByBucket, int level, WorkContext context, IStorageActionsAccessor actions, HashSet<string> reduceKeys, int inputCount) { this.parent = parent; this.inputCount = inputCount; indexId = this.parent.indexId; ViewGenerator = viewGenerator; MappedResultsByBucket = mappedResultsByBucket; Level = level; Context = context; Actions = actions; ReduceKeys = reduceKeys; anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(this.parent.context.Database, this.parent.indexDefinition, ViewGenerator, logIndexing); if (Level == 2) { batchers = Context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); } }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write(context, (indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc,i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if(batch.SkipDeleteFromIndex[i] == false) indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } }); } catch(Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); } return sourceCount; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject) { var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId(); var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject; List<AbstractField> abstractFields; try { abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(); } catch (InvalidShapeException e) { throw new InvalidSpatialShapeException(e, newDocId); } return new IndexingResult { Fields = abstractFields, NewDocId = newDocId, ShouldSkip = false }; }
// This method may be called concurrently, by both the ReduceTask (for removal) // and by the ReducingExecuter (for add/modify). This is okay with us, since the // Write() call is already handling locking properly public void ReduceDocuments(AbstractViewGenerator viewGenerator, IEnumerable <object> mappedResults, WorkContext context, IStorageActionsAccessor actions, string[] reduceKeys) { var count = 0; Write(context, (indexWriter, analyzer, stats) => { stats.Operation = IndexingWorkStats.Status.Reduce; var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); foreach (var reduceKey in reduceKeys) { var entryKey = reduceKey; indexWriter.DeleteDocuments(new Term(Abstractions.Data.Constants.ReduceKeyFieldName, entryKey.ToLowerInvariant())); batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, entryKey), exception); context.AddError(name, entryKey, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(entryKey)); } PropertyDescriptorCollection properties = null; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var reduceKeyField = new Field(Constants.ReduceKeyFieldName, "dummy", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationReduce(mappedResults, viewGenerator.ReduceDefinition, actions, context, stats)) { count++; float boost; var fields = GetFields(anonymousObjectToLuceneDocumentConverter, doc, ref properties, out boost).ToList(); string reduceKeyAsString = ExtractReduceKey(viewGenerator, doc); reduceKeyField.SetValue(reduceKeyAsString.ToLowerInvariant()); luceneDoc.GetFields().Clear(); luceneDoc.SetBoost(boost); luceneDoc.Add(reduceKeyField); foreach (var field in fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, reduceKeyAsString), exception); context.AddError(name, reduceKeyAsString, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc)); LogIndexedDocument(reduceKeyAsString, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); stats.ReduceSuccesses++; } batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); return(count + reduceKeys.Length); }); logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name, string.Join(", ", reduceKeys))); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; int loadDocumentCount = 0; long loadDocumentDuration = 0; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count); batch.SetIndexingPerformance(indexingPerfStats); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
// This method may be called concurrently, by both the ReduceTask (for removal) // and by the ReducingExecuter (for add/modify). This is okay with us, since the // Write() call is already handling locking properly public void ReduceDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> mappedResults, WorkContext context, IStorageActionsAccessor actions, string[] reduceKeys) { var count = 0; Write(context, (indexWriter, analyzer) => { actions.Indexing.SetCurrentIndexStatsTo(name); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); foreach (var reduceKey in reduceKeys) { var entryKey = reduceKey; indexWriter.DeleteDocuments(new Term(Abstractions.Data.Constants.ReduceKeyFieldName, entryKey.ToLowerInvariant())); batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, entryKey), exception); context.AddError(name, entryKey, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(entryKey)); } PropertyDescriptorCollection properties = null; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var reduceKeyField = new Field(Abstractions.Data.Constants.ReduceKeyFieldName, "dummy", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationReduce(mappedResults, viewGenerator.ReduceDefinition, actions, context)) { count++; var fields = GetFields(anonymousObjectToLuceneDocumentConverter, doc, ref properties).ToList(); string reduceKeyAsString = ExtractReduceKey(viewGenerator, doc); reduceKeyField.SetValue(reduceKeyAsString.ToLowerInvariant()); luceneDoc.GetFields().Clear(); luceneDoc.Add(reduceKeyField); foreach (var field in fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, reduceKeyAsString), exception); context.AddError(name, reduceKeyAsString, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc)); logIndexing.Debug("Reduce key {0} result in index {1} gave document: {2}", reduceKeyAsString, name, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); actions.Indexing.IncrementReduceSuccessIndexing(); } batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); return true; }); logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name, string.Join(", ", reduceKeys))); }