Exemplo n.º 1
1
        public void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.AddDocument(doc);
            }
            w.Close();

            // If buffer size is small enough to cause a flush, errors ensue...
            w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            w.SetMaxBufferedDocs(2);

            Term pkTerm = new Term("pk", "");
            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                String value = i.ToString();
                doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.UpdateDocument(pkTerm.CreateTerm(value), doc);
            }
            w.Rollback();

            IndexReader r = IndexReader.Open(dir, true);
            Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback");
            r.Close();
            dir.Close();
        }
Exemplo n.º 2
0
        public void TestRollbackIntegrityWithBufferFlush()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.AddDocument(doc);
            }
            w.Close();

            // If buffer size is small enough to cause a flush, errors ensue...
            w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            w.SetMaxBufferedDocs(2);

            Term pkTerm = new Term("pk", "");

            for (int i = 0; i < 3; i++)
            {
                Document doc   = new Document();
                String   value = i.ToString();
                doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.UpdateDocument(pkTerm.CreateTerm(value), doc);
            }
            w.Rollback();

            IndexReader r = IndexReader.Open(dir, true);

            Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback");
            r.Close();
            dir.Close();
        }
Exemplo n.º 3
0
		/// <summary>
		/// Построение поискового индекса по полученным в результате синхронизации сообщениям
		/// </summary>
		public static int ProcessResponseMessages(IEnumerable<MessageSearchInfo> messages)
		{
			if (messages == null)
				throw new ArgumentNullException("messages");

			int addedCount;

			var indexPath = GetIndexDir();

			// Чистим
			if (IndexReader.IndexExists(indexPath))
			{
				var reader = IndexReader.Open(indexPath, false);
				var baseTerm = new Term("mid");

				try
				{
					foreach (var msg in messages)
					{
						var term = baseTerm.CreateTerm(msg.MessageBody ?? "");
						reader.DeleteDocuments(term);
					}
				}
				finally
				{
					reader.Close();
				}
			}

			//Добавляем
			var writer = CreateIndexWriter();
			try
			{
				var count = 0;
				foreach (var msg in messages)
				{
					// Форумы с id 0 и 58 это мусорки
					if (msg.ForumID == 0 || msg.ForumID == 58)
						continue;
					writer.AddDocument(
						CreateDocument(
							msg.MessageID.ToString(),
							msg.ForumID.ToString(),
							FormatDate(msg.MessageDate),
							msg.Subject,
							msg.UserID.ToString(),
							msg.UserNick,
							msg.MessageBody));
					count++;
				}
				addedCount = count;
			}
			finally
			{
				writer.Close();
			}

			return addedCount;
		}
Exemplo n.º 4
0
            /// <summary>Increments the enumeration to the next element.  True if one exists. </summary>
            //@Override
            public override bool Next()
            {
                // if a current term exists, the actual enum is initialized:
                // try change to next term, if no such term exists, fall-through
                if (currentTerm != null)
                {
                    System.Diagnostics.Debug.Assert(actualEnum != null);
                    if (actualEnum.Next())
                    {
                        currentTerm = actualEnum.Term;
                        if (TermCompare(currentTerm))
                        {
                            return(true);
                        }
                    }
                }
                // if all above fails, we go forward to the next enum,
                // if one is available
                currentTerm = null;
                while (rangeBounds.Count >= 2)
                {
                    // close the current enum and read next bounds
                    if (actualEnum != null)
                    {
                        actualEnum.Close();
                        actualEnum = null;
                    }
                    string lowerBound = rangeBounds.First.Value;
                    rangeBounds.RemoveFirst();
                    this.currentUpperBound = rangeBounds.First.Value;
                    rangeBounds.RemoveFirst();
                    // create a new enum
                    actualEnum  = reader.Terms(termTemplate.CreateTerm(lowerBound));
                    currentTerm = actualEnum.Term;
                    if (currentTerm != null && TermCompare(currentTerm))
                    {
                        return(true);
                    }
                    // clear the current term for next iteration
                    currentTerm = null;
                }

                // no more sub-range enums available
                System.Diagnostics.Debug.Assert(rangeBounds.Count == 0 && currentTerm == null);
                return(false);
            }
Exemplo n.º 5
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
Exemplo n.º 6
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count = 0;
            var sourceCount = 0;
            var sw = Stopwatch.StartNew();
            var start = SystemTime.UtcNow;
            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet<string>();
                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                    .Where(x => x != null)
                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                            return doc;
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                            {
                                logIndexing.WarnException(
                                    string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                                  name, documentId),
                                    exception);
                                context.AddError(name,
                                                 documentId,
                                                 exception.Message,
                                                 "OnIndexEntryDeleted Trigger"
                                    );
                            },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        return doc;
                    })
                        .Where(x => x is FilteredDocument == false)
                        .ToList();

                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
                        var luceneDoc = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                        {
                                            logIndexing.WarnException(
                                                string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                              name, indexingResult.NewDocId),
                                                exception);
                                            context.AddError(name,
                                                             indexingResult.NewDocId,
                                                             exception.Message,
                                                             "OnIndexEntryCreated Trigger"
                                                );
                                        },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex;
                    IDictionary<string, HashSet<string>> result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                            foreach (var childDocumentKey in referencedDocument.Value)
                            {
                                dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key);
                            }
                        }
                    }

                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                        {
                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                            context.AddError(name, null, ex.Message, "AnErrorOccured Trigger");
                        },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                        {
                            logIndexing.WarnException("Failed to dispose on index update trigger", e);
                            context.AddError(name, null, e.Message, "Dispose Trigger");
                        },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return new IndexedItemsInfo
                {
                    ChangedDocs = sourceCount,
                    HighestETag = batch.HighestEtagInBatch
                };
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount = sourceCount,
                InputCount = batch.Docs.Count,
                Duration = sw.Elapsed,
                Operation = "Index",
                Started = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
Exemplo n.º 7
0
		// TODO: Это бизнеслогика в чистом виде. Нужно выносить в отдельный класс.
		private void IndexBackgroundWorkerDoWork(object sender, DoWorkEventArgs e)
		{
			var args = (DoWorkArgs)e.Argument;

			args.Stage = DoWorkStage.Preparing;
			args.ProcessedMessages = 0;
			var indexedCount = 0;
			int maxMidInDb;
			int minMidInDb;
			using (var db = _provider.CreateDBContext())
			{
				args.TotalMessages = db.Messages().Count();
				minMidInDb = db.Messages().Min(m => m.ID);
				maxMidInDb = db.Messages().Max(m => m.ID);
			}

			var writer = SearchHelper.CreateIndexWriter();
			try
			{
				// Направление выборки - от свежих к ранним
				var lastMid = maxMidInDb;

				var baseTerm = new Term("mid");
				var bw = (BackgroundWorker)sender;

				bw.ReportProgress(0, args);

				args.Stage = DoWorkStage.Indexing;

				while (lastMid >= minMidInDb)
				{
					bw.ReportProgress(0, args);
					
					if (bw.CancellationPending)
					{
						e.Cancel = true;
						break;
					}

					List<MessageSearchInfo> items;
					var localLastMid = lastMid;
					using (var db = _provider.CreateDBContext())
						items =
							db
								.Messages(m => m.ID <= localLastMid)
								.OrderByDescending(m => m.ID)
								.Take(_messagesBatchSize)
								.Select(
									m =>
										new MessageSearchInfo(
											m.ID,
											m.Date,
											m.Subject,
											m.Message,
											m.ForumID,
											m.UserID,
											m.UserNick))
								.ToList();
					var reader = IndexReader.Open(writer.Directory, false);
					try
					{
						foreach (var item in items)
						{
							var term = baseTerm.CreateTerm(item.MessageID.ToString());
							lastMid = item.MessageID;
							args.ProcessedMessages += 1;
							if (!(reader.DocFreq(term) > 0))
							{
								writer.AddDocument(item.CreateDocument());
								indexedCount += 1;
							}
						}
						lastMid -= 1; // сдвигаемся на единицу меньше последнего выбранного
					}
					finally
					{
						reader.Dispose();
					}
				}

				bw.ReportProgress(0, args);

				if (args.DoOptimize)
				{
					args.Stage = DoWorkStage.Optimizing;
					bw.ReportProgress(0, args);
					writer.UseCompoundFile = true;
					writer.Optimize();
				}
			}
			finally
			{
				writer.Dispose();
			}

			e.Result = indexedCount;
		}
Exemplo n.º 8
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var writeToIndexStats = new List<PerformanceStats>();

			IndexingPerformanceStats performance = null;
			var performanceStats = new List<BasePerformanceStats>();

			var storageCommitDuration = new Stopwatch();

			actions.BeforeStorageCommit += storageCommitDuration.Start;

			actions.AfterStorageCommit += () =>
			{
				storageCommitDuration.Stop();

				performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
			};

			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();

				try
				{
					performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

					var deleteExistingDocumentsDuration = new Stopwatch();
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						token.ThrowIfCancellationRequested();

						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
						    context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
						{
							using (StopwatchScope.For(deleteExistingDocumentsDuration))
							{
								indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
							}
						}

						return doc;
					})
					.Where(x => x is FilteredDocument == false)
					.ToList();

					performanceStats.Add(new PerformanceStats
					{
						Name = IndexingOperation.Lucene_DeleteExistingDocument,
						DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
					});

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

					var parallelProcessingStart = SystemTime.UtcNow;

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
                        token.ThrowIfCancellationRequested();
						var parallelStats = new ParallelBatchStats
						{
							StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
						};

						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;

							var linqExecutionDuration = new Stopwatch();
							var addDocumentDutation = new Stopwatch();
							var convertToLuceneDocumentDuration = new Stopwatch();

							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
							{
								token.ThrowIfCancellationRequested();

								float boost;
								IndexingResult indexingResult;
								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									try
									{

										indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
									}
									catch (Exception e)
									{
										onErrorFunc(e, doc);
										continue;
									}
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);

								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									luceneDoc.GetFields().Clear();
									luceneDoc.Boost = boost;
									documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
									luceneDoc.Add(documentIdField);
									foreach (var field in indexingResult.Fields)
									{
										luceneDoc.Add(field);
									}
								}

								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											PublicName, indexingResult.NewDocId),
											exception);
										context.AddError(
											indexId,
											PublicName,
											indexingResult.NewDocId,
											exception,
											"OnIndexEntryCreated Trigger");
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

								using (StopwatchScope.For(addDocumentDutation))
								{
									AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
								}

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
							parallelOperations.Enqueue(parallelStats);

							parallelOperations.Enqueue(parallelStats);
						}
					});

					performanceStats.Add(new ParallelPerformanceStats
					{
						NumberOfThreads = parallelOperations.Count,
						DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
						BatchedOperations = parallelOperations.ToList()
					});

					var updateDocumentReferencesDuration = new Stopwatch();
					using (StopwatchScope.For(updateDocumentReferencesDuration))
					{
						UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
					}
					performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
							context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
							context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
						},
						x => x.Dispose());
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			}, writeToIndexStats);

			performanceStats.AddRange(writeToIndexStats);

			performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);

			logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);

			return performance;
		}
Exemplo n.º 9
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				try
				{
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc,i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
									              name, documentId),
									exception);
								context.AddError(name,
								                 documentId,
								                 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryDeleted(documentId));
						if(batch.SkipDeleteFromIndex[i] == false)
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();


					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
						                                Field.Index.NOT_ANALYZED_NO_NORMS);

						foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats))
						{
							float boost;
							var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

							if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
							{
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
											string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											              name, indexingResult.NewDocId),
											exception);
										context.AddError(name,
										                 indexingResult.NewDocId,
										                 exception.Message
											);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
							}

							Interlocked.Increment(ref stats.IndexingSuccesses);
						}
					});
				}
				catch(Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(name, null, ex.Message);
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(name, null, e.Message);
						},
						x => x.Dispose());
				}
				return sourceCount;
			});
			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				InputCount = sourceCount,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
Exemplo n.º 10
0
        /// <summary>
        /// 更新MP3为Artist模式下的ListView中的项
        /// </summary>
        private void UpadateArtistListView(out ListViewItem[] items, ref int indexRecord, int times, ref Term termField)
        {
            int nums;
            items = new ListViewItem[times];
            for (int i = 0; i < times; i++)
            {
                try
                {
                    Term term = termField.CreateTerm(Static.MP3Artist[indexRecord]);
                    TermQuery query = new TermQuery(term);
                    this.topDocs = this.indexSearcher.Search(query, 1);
                    nums = this.topDocs.totalHits;
                }
                catch (Exception)
                {
                    nums = 0;
                    continue;
                }

                Static.MArtistInfo.Add(new MP3ArtistInfo(Static.MP3Artist[indexRecord], nums));
                string[] subItem = { Static.MArtistInfo[indexRecord].artist,
                              Deal.ToEnglishNumString(Static.MArtistInfo[indexRecord].songNums)};
                indexRecord++;
                //artis图标在imagelist中的imagekey是5。
                items[i] = new ListViewItem(subItem, 5);
            }
        }