public SetValue ( System value_Renamed ) : void | ||
value_Renamed | System | |
return | void |
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) { result |= 1; } else if (f.StringValue().Equals("id2")) { result |= 2; } else if (f.StringValue().Equals("id3")) { result |= 4; } else { Assert.Fail("unexpected id field"); } } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
public virtual void TestMultiValueSource() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(f); for (int i = 0; i < 17; i++) { f.SetValue("" + i); w.AddDocument(doc); w.Commit(); } IndexReader r = w.GetReader(); w.Close(); Assert.IsTrue(r.GetSequentialSubReaders().Length > 1); ValueSource s1 = new IntFieldSource("field"); DocValues v1 = s1.GetValues(r); DocValues v2 = new MultiValueSource(s1).GetValues(r); for (int i = 0; i < r.MaxDoc(); i++) { Assert.AreEqual(v1.IntVal(i), i); Assert.AreEqual(v2.IntVal(i), i); } Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches(); r.Close(); dir.Close(); }
public virtual void TestFieldSetValueChangeBinary() { Field field1 = new Field("field1", new byte[0], Field.Store.YES); Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED); Assert.Throws <ArgumentException>(() => field1.SetValue("abc"), "did not hit expected exception"); Assert.Throws <ArgumentException>(() => field2.SetValue(new byte[0]), "did not hit expected exception"); }
public virtual void TestFieldSetValueChangeBinary() { Field field1 = new Field("field1", new byte[0], Field.Store.YES); Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED); try { field1.SetValue("abc"); Assert.Fail("did not hit expected exception"); } catch (System.ArgumentException iae) { // expected } try { field2.SetValue(new byte[0]); Assert.Fail("did not hit expected exception"); } catch (System.ArgumentException iae) { // expected } }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; int loadDocumentCount = 0; long loadDocumentDuration = 0; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count); batch.SetIndexingPerformance(indexingPerfStats); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
public virtual void TestFieldSetValueChangeBinary() { Field field1 = new Field("field1", new byte[0], Field.Store.YES); Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED); try { field1.SetValue("abc"); Assert.Fail("did not hit expected exception"); } catch (System.ArgumentException iae) { // expected } try { field2.SetValue(new byte[0]); Assert.Fail("did not hit expected exception"); } catch (System.ArgumentException iae) { // expected } }
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) result |= 1; else if (f.StringValue().Equals("id2")) result |= 2; else if (f.StringValue().Equals("id3")) result |= 4; else Assert.Fail("unexpected id field"); } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { RecordCurrentBatch("Current", batch.Docs.Count); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message, "OnIndexEntryDeleted Trigger" ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } } }); var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex; IDictionary<string, HashSet<string>> result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); foreach (var childDocumentKey in referencedDocument.Value) { dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key); } } } } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo { ChangedDocs = sourceCount, HighestETag = batch.HighestEtagInBatch }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; Write(context, (indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); var documentsWrapped = documents.Select((dynamic doc) => { if(doc.__document_id == null) throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); count++; string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant())); return doc; }); var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats)) { count++; float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { count += 1; luceneDoc.GetFields().Clear(); luceneDoc.SetBoost(boost); documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } stats.IndexingSuccesses++; } batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); return count; }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var writeToIndexStats = new List<PerformanceStats>(); IndexingPerformanceStats performance = null; var performanceStats = new List<BasePerformanceStats>(); var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count); var deleteExistingDocumentsDuration = new Stopwatch(); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { token.ThrowIfCancellationRequested(); Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { using (StopwatchScope.For(deleteExistingDocumentsDuration)) { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } } return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Lucene_DeleteExistingDocument, DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds }); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; var linqExecutionDuration = new Stopwatch(); var addDocumentDutation = new Stopwatch(); var convertToLuceneDocumentDuration = new Stopwatch(); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration)) { token.ThrowIfCancellationRequested(); float boost; IndexingResult indexingResult; using (StopwatchScope.For(convertToLuceneDocumentDuration)) { try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); using (StopwatchScope.For(convertToLuceneDocumentDuration)) { luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", PublicName, indexingResult.NewDocId), exception); context.AddError( indexId, PublicName, indexingResult.NewDocId, exception, "OnIndexEntryCreated Trigger"); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); using (StopwatchScope.For(addDocumentDutation)) { AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); parallelOperations.Enqueue(parallelStats); } }); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex); context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e); context.AddError(indexId, PublicName, null, e, "Dispose Trigger"); }, x => x.Dispose()); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }, writeToIndexStats); performanceStats.AddRange(writeToIndexStats); performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats); logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName); return performance; }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write(context, (indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc,i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if(batch.SkipDeleteFromIndex[i] == false) indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } }); } catch(Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); } return sourceCount; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
public virtual void TestFieldSetValueChangeBinary() { Field field1 = new Field("field1", new byte[0], Field.Store.YES); Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED); Assert.Throws<ArgumentException>(() => field1.SetValue("abc"), "did not hit expected exception"); Assert.Throws<ArgumentException>(() => field2.SetValue(new byte[0]), "did not hit expected exception"); }
// This method may be called concurrently, by both the ReduceTask (for removal) // and by the ReducingExecuter (for add/modify). This is okay with us, since the // Write() call is already handling locking properly public void ReduceDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> mappedResults, WorkContext context, IStorageActionsAccessor actions, string[] reduceKeys) { var count = 0; Write(context, (indexWriter, analyzer) => { actions.Indexing.SetCurrentIndexStatsTo(name); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); foreach (var reduceKey in reduceKeys) { var entryKey = reduceKey; indexWriter.DeleteDocuments(new Term(Abstractions.Data.Constants.ReduceKeyFieldName, entryKey.ToLowerInvariant())); batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, entryKey), exception); context.AddError(name, entryKey, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(entryKey)); } PropertyDescriptorCollection properties = null; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var reduceKeyField = new Field(Abstractions.Data.Constants.ReduceKeyFieldName, "dummy", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationReduce(mappedResults, viewGenerator.ReduceDefinition, actions, context)) { count++; var fields = GetFields(anonymousObjectToLuceneDocumentConverter, doc, ref properties).ToList(); string reduceKeyAsString = ExtractReduceKey(viewGenerator, doc); reduceKeyField.SetValue(reduceKeyAsString.ToLowerInvariant()); luceneDoc.GetFields().Clear(); luceneDoc.Add(reduceKeyField); foreach (var field in fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, reduceKeyAsString), exception); context.AddError(name, reduceKeyAsString, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc)); logIndexing.Debug("Reduce key {0} result in index {1} gave document: {2}", reduceKeyAsString, name, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); actions.Indexing.IncrementReduceSuccessIndexing(); } batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); return true; }); logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name, string.Join(", ", reduceKeys))); }