SetValue() public method

Expert: change the value of this field. This can be used during indexing to re-use a single Field instance to improve indexing speed by avoiding GC cost of new'ing and reclaiming Field instances. Typically a single Document instance is re-used as well. This helps most on small documents.

Each Field instance should only be used once within a single Document instance. See ImproveIndexingSpeed for details.

public SetValue ( System value_Renamed ) : void
value_Renamed System
return void
示例#1
0
        public virtual void  TestFieldSetValue()
        {
            Field    field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
            Document doc   = new Document();

            doc.Add(field);
            doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));

            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            field.SetValue("id2");
            writer.AddDocument(doc);
            field.SetValue("id3");
            writer.AddDocument(doc);
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            Query query = new TermQuery(new Term("keyword", "test"));

            // ensure that queries return expected results without DateFilter first
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            int result = 0;

            for (int i = 0; i < 3; i++)
            {
                Document doc2 = searcher.Doc(hits[i].doc);
                Field    f    = doc2.GetField("id");
                if (f.StringValue().Equals("id1"))
                {
                    result |= 1;
                }
                else if (f.StringValue().Equals("id2"))
                {
                    result |= 2;
                }
                else if (f.StringValue().Equals("id3"))
                {
                    result |= 4;
                }
                else
                {
                    Assert.Fail("unexpected id field");
                }
            }
            searcher.Close();
            dir.Close();
            Assert.AreEqual(7, result, "did not see all IDs");
        }
		public virtual void  TestMultiValueSource()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
			Document doc = new Document();
			Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(f);
			
			for (int i = 0; i < 17; i++)
			{
				f.SetValue("" + i);
				w.AddDocument(doc);
				w.Commit();
			}
			
			IndexReader r = w.GetReader();
			w.Close();
			
			Assert.IsTrue(r.GetSequentialSubReaders().Length > 1);
			
			ValueSource s1 = new IntFieldSource("field");
			DocValues v1 = s1.GetValues(r);
			DocValues v2 = new MultiValueSource(s1).GetValues(r);
			
			for (int i = 0; i < r.MaxDoc(); i++)
			{
				Assert.AreEqual(v1.IntVal(i), i);
				Assert.AreEqual(v2.IntVal(i), i);
			}
			
			Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches();
			
			r.Close();
			dir.Close();
		}
示例#3
0
        public virtual void  TestFieldSetValueChangeBinary()
        {
            Field field1 = new Field("field1", new byte[0], Field.Store.YES);
            Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED);

            Assert.Throws <ArgumentException>(() => field1.SetValue("abc"), "did not hit expected exception");
            Assert.Throws <ArgumentException>(() => field2.SetValue(new byte[0]), "did not hit expected exception");
        }
示例#4
0
        public virtual void  TestFieldSetValueChangeBinary()
        {
            Field field1 = new Field("field1", new byte[0], Field.Store.YES);
            Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED);

            try
            {
                field1.SetValue("abc");
                Assert.Fail("did not hit expected exception");
            }
            catch (System.ArgumentException iae)
            {
                // expected
            }
            try
            {
                field2.SetValue(new byte[0]);
                Assert.Fail("did not hit expected exception");
            }
            catch (System.ArgumentException iae)
            {
                // expected
            }
        }
示例#5
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
示例#6
0
		public virtual void  TestFieldSetValueChangeBinary()
		{
			Field field1 = new Field("field1", new byte[0], Field.Store.YES);
			Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED);
			try
			{
				field1.SetValue("abc");
				Assert.Fail("did not hit expected exception");
			}
			catch (System.ArgumentException iae)
			{
				// expected
			}
			try
			{
				field2.SetValue(new byte[0]);
				Assert.Fail("did not hit expected exception");
			}
			catch (System.ArgumentException iae)
			{
				// expected
			}
		}
示例#7
0
		public virtual void  TestFieldSetValue()
		{
			
			Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
			Document doc = new Document();
			doc.Add(field);
			doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(doc);
			field.SetValue("id2");
			writer.AddDocument(doc);
			field.SetValue("id3");
			writer.AddDocument(doc);
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			Query query = new TermQuery(new Term("keyword", "test"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(3, hits.Length);
			int result = 0;
			for (int i = 0; i < 3; i++)
			{
				Document doc2 = searcher.Doc(hits[i].doc);
				Field f = doc2.GetField("id");
				if (f.StringValue().Equals("id1"))
					result |= 1;
				else if (f.StringValue().Equals("id2"))
					result |= 2;
				else if (f.StringValue().Equals("id3"))
					result |= 4;
				else
					Assert.Fail("unexpected id field");
			}
			searcher.Close();
			dir.Close();
			Assert.AreEqual(7, result, "did not see all IDs");
		}
示例#8
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count = 0;
            var sourceCount = 0;
            var sw = Stopwatch.StartNew();
            var start = SystemTime.UtcNow;
            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet<string>();
                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                    .Where(x => x != null)
                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                            return doc;
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                            {
                                logIndexing.WarnException(
                                    string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                                  name, documentId),
                                    exception);
                                context.AddError(name,
                                                 documentId,
                                                 exception.Message,
                                                 "OnIndexEntryDeleted Trigger"
                                    );
                            },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        return doc;
                    })
                        .Where(x => x is FilteredDocument == false)
                        .ToList();

                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
                        var luceneDoc = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                        {
                                            logIndexing.WarnException(
                                                string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                              name, indexingResult.NewDocId),
                                                exception);
                                            context.AddError(name,
                                                             indexingResult.NewDocId,
                                                             exception.Message,
                                                             "OnIndexEntryCreated Trigger"
                                                );
                                        },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex;
                    IDictionary<string, HashSet<string>> result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                            foreach (var childDocumentKey in referencedDocument.Value)
                            {
                                dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key);
                            }
                        }
                    }

                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                        {
                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                            context.AddError(name, null, ex.Message, "AnErrorOccured Trigger");
                        },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                        {
                            logIndexing.WarnException("Failed to dispose on index update trigger", e);
                            context.AddError(name, null, e.Message, "Dispose Trigger");
                        },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return new IndexedItemsInfo
                {
                    ChangedDocs = sourceCount,
                    HighestETag = batch.HighestEtagInBatch
                };
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount = sourceCount,
                InputCount = batch.Docs.Count,
                Duration = sw.Elapsed,
                Operation = "Index",
                Started = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
示例#9
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				var documentsWrapped = documents.Select((dynamic doc) =>
				{
					if(doc.__document_id == null)
						throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

					count++;
					string documentId = doc.__document_id.ToString();
					if (processedKeys.Add(documentId) == false)
						return doc;
					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
												   name, documentId),
								exception);
							context.AddError(name,
											 documentId,
											 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryDeleted(documentId));
					indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant()));
					return doc;
				});
				var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
				var luceneDoc = new Document();
				var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS);
				foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))
				{
					count++;

					float boost;
					var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

					if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
					{
						count += 1;
						luceneDoc.GetFields().Clear();
						luceneDoc.SetBoost(boost);
						documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
						luceneDoc.Add(documentIdField);
						foreach (var field in indexingResult.Fields)
						{
							luceneDoc.Add(field);
						}
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
													   name, indexingResult.NewDocId),
									exception);
								context.AddError(name,
												 indexingResult.NewDocId,
												 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
						LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
						AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
					}

					stats.IndexingSuccesses++;
				}
				batchers.ApplyAndIgnoreAllErrors(
					e =>
					{
						logIndexing.WarnException("Failed to dispose on index update trigger", e);
						context.AddError(name, null, e.Message);
					},
					x => x.Dispose());
				return count;
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
示例#10
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var writeToIndexStats = new List<PerformanceStats>();

			IndexingPerformanceStats performance = null;
			var performanceStats = new List<BasePerformanceStats>();

			var storageCommitDuration = new Stopwatch();

			actions.BeforeStorageCommit += storageCommitDuration.Start;

			actions.AfterStorageCommit += () =>
			{
				storageCommitDuration.Stop();

				performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
			};

			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();

				try
				{
					performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

					var deleteExistingDocumentsDuration = new Stopwatch();
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						token.ThrowIfCancellationRequested();

						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
						    context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
						{
							using (StopwatchScope.For(deleteExistingDocumentsDuration))
							{
								indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
							}
						}

						return doc;
					})
					.Where(x => x is FilteredDocument == false)
					.ToList();

					performanceStats.Add(new PerformanceStats
					{
						Name = IndexingOperation.Lucene_DeleteExistingDocument,
						DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
					});

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

					var parallelProcessingStart = SystemTime.UtcNow;

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
                        token.ThrowIfCancellationRequested();
						var parallelStats = new ParallelBatchStats
						{
							StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
						};

						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;

							var linqExecutionDuration = new Stopwatch();
							var addDocumentDutation = new Stopwatch();
							var convertToLuceneDocumentDuration = new Stopwatch();

							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
							{
								token.ThrowIfCancellationRequested();

								float boost;
								IndexingResult indexingResult;
								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									try
									{

										indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
									}
									catch (Exception e)
									{
										onErrorFunc(e, doc);
										continue;
									}
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);

								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									luceneDoc.GetFields().Clear();
									luceneDoc.Boost = boost;
									documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
									luceneDoc.Add(documentIdField);
									foreach (var field in indexingResult.Fields)
									{
										luceneDoc.Add(field);
									}
								}

								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											PublicName, indexingResult.NewDocId),
											exception);
										context.AddError(
											indexId,
											PublicName,
											indexingResult.NewDocId,
											exception,
											"OnIndexEntryCreated Trigger");
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

								using (StopwatchScope.For(addDocumentDutation))
								{
									AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
								}

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
							parallelOperations.Enqueue(parallelStats);

							parallelOperations.Enqueue(parallelStats);
						}
					});

					performanceStats.Add(new ParallelPerformanceStats
					{
						NumberOfThreads = parallelOperations.Count,
						DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
						BatchedOperations = parallelOperations.ToList()
					});

					var updateDocumentReferencesDuration = new Stopwatch();
					using (StopwatchScope.For(updateDocumentReferencesDuration))
					{
						UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
					}
					performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
							context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
							context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
						},
						x => x.Dispose());
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			}, writeToIndexStats);

			performanceStats.AddRange(writeToIndexStats);

			performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);

			logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);

			return performance;
		}
示例#11
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				try
				{
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc,i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
									              name, documentId),
									exception);
								context.AddError(name,
								                 documentId,
								                 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryDeleted(documentId));
						if(batch.SkipDeleteFromIndex[i] == false)
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();


					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
						                                Field.Index.NOT_ANALYZED_NO_NORMS);

						foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats))
						{
							float boost;
							var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

							if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
							{
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
											string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											              name, indexingResult.NewDocId),
											exception);
										context.AddError(name,
										                 indexingResult.NewDocId,
										                 exception.Message
											);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
							}

							Interlocked.Increment(ref stats.IndexingSuccesses);
						}
					});
				}
				catch(Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(name, null, ex.Message);
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(name, null, e.Message);
						},
						x => x.Dispose());
				}
				return sourceCount;
			});
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
示例#12
0
		public virtual void  TestFieldSetValueChangeBinary()
		{
			Field field1 = new Field("field1", new byte[0], Field.Store.YES);
			Field field2 = new Field("field2", "", Field.Store.YES, Field.Index.ANALYZED);

            Assert.Throws<ArgumentException>(() => field1.SetValue("abc"), "did not hit expected exception");
            Assert.Throws<ArgumentException>(() => field2.SetValue(new byte[0]), "did not hit expected exception");
		}
示例#13
0
		// This method may be called concurrently, by both the ReduceTask (for removal)
		// and by the ReducingExecuter (for add/modify). This is okay with us, since the 
		// Write() call is already handling locking properly
		public void ReduceDocuments(AbstractViewGenerator viewGenerator,
									IEnumerable<object> mappedResults,
									WorkContext context,
									IStorageActionsAccessor actions,
									string[] reduceKeys)
		{
			var count = 0;
			Write(context, (indexWriter, analyzer) =>
			{
				actions.Indexing.SetCurrentIndexStatsTo(name);
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				foreach (var reduceKey in reduceKeys)
				{
					var entryKey = reduceKey;
					indexWriter.DeleteDocuments(new Term(Abstractions.Data.Constants.ReduceKeyFieldName, entryKey.ToLowerInvariant()));
					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
								              name, entryKey),
								exception);
							context.AddError(name,
							                 entryKey,
							                 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryDeleted(entryKey));
				}
				PropertyDescriptorCollection properties = null;
				var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
				var luceneDoc = new Document();
				var reduceKeyField = new Field(Abstractions.Data.Constants.ReduceKeyFieldName, "dummy",
				                      Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
				foreach (var doc in RobustEnumerationReduce(mappedResults, viewGenerator.ReduceDefinition, actions, context))
				{
					count++;
					var fields = GetFields(anonymousObjectToLuceneDocumentConverter, doc, ref properties).ToList();

					string reduceKeyAsString = ExtractReduceKey(viewGenerator, doc);
					reduceKeyField.SetValue(reduceKeyAsString.ToLowerInvariant());

					luceneDoc.GetFields().Clear();
					luceneDoc.Add(reduceKeyField);
					foreach (var field in fields)
					{
						luceneDoc.Add(field);
					}

					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
								              name, reduceKeyAsString),
								exception);
							context.AddError(name,
							                 reduceKeyAsString,
							                 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryCreated(reduceKeyAsString, luceneDoc));
					logIndexing.Debug("Reduce key {0} result in index {1} gave document: {2}", reduceKeyAsString, name, luceneDoc);
					AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
					actions.Indexing.IncrementReduceSuccessIndexing();
				}
				batchers.ApplyAndIgnoreAllErrors(
					e =>
					{
						logIndexing.WarnException("Failed to dispose on index update trigger", e);
						context.AddError(name, null, e.Message);
					},
					x => x.Dispose());
				return true;
			});
			logIndexing.Debug(() => string.Format("Reduce resulted in {0} entries for {1} for reduce keys: {2}", count, name,
							  string.Join(", ", reduceKeys)));
		}