public void TestRollbackIntegrityWithBufferFlush() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.AddDocument(doc); } w.Close(); // If buffer size is small enough to cause a flush, errors ensue... w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetMaxBufferedDocs(2); Term pkTerm = new Term("pk", ""); for (int i = 0; i < 3; i++) { Document doc = new Document(); String value = i.ToString(); doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.UpdateDocument(pkTerm.CreateTerm(value), doc); } w.Rollback(); IndexReader r = IndexReader.Open(dir, true); Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback"); r.Close(); dir.Close(); }
public void TestRollbackIntegrityWithBufferFlush() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.AddDocument(doc); } w.Close(); // If buffer size is small enough to cause a flush, errors ensue... w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetMaxBufferedDocs(2); Term pkTerm = new Term("pk", ""); for (int i = 0; i < 3; i++) { Document doc = new Document(); String value = i.ToString(); doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.UpdateDocument(pkTerm.CreateTerm(value), doc); } w.Rollback(); IndexReader r = IndexReader.Open(dir, true); Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback"); r.Close(); dir.Close(); }
/// <summary> /// Построение поискового индекса по полученным в результате синхронизации сообщениям /// </summary> public static int ProcessResponseMessages(IEnumerable<MessageSearchInfo> messages) { if (messages == null) throw new ArgumentNullException("messages"); int addedCount; var indexPath = GetIndexDir(); // Чистим if (IndexReader.IndexExists(indexPath)) { var reader = IndexReader.Open(indexPath, false); var baseTerm = new Term("mid"); try { foreach (var msg in messages) { var term = baseTerm.CreateTerm(msg.MessageBody ?? ""); reader.DeleteDocuments(term); } } finally { reader.Close(); } } //Добавляем var writer = CreateIndexWriter(); try { var count = 0; foreach (var msg in messages) { // Форумы с id 0 и 58 это мусорки if (msg.ForumID == 0 || msg.ForumID == 58) continue; writer.AddDocument( CreateDocument( msg.MessageID.ToString(), msg.ForumID.ToString(), FormatDate(msg.MessageDate), msg.Subject, msg.UserID.ToString(), msg.UserNick, msg.MessageBody)); count++; } addedCount = count; } finally { writer.Close(); } return addedCount; }
/// <summary>Increments the enumeration to the next element. True if one exists. </summary> //@Override public override bool Next() { // if a current term exists, the actual enum is initialized: // try change to next term, if no such term exists, fall-through if (currentTerm != null) { System.Diagnostics.Debug.Assert(actualEnum != null); if (actualEnum.Next()) { currentTerm = actualEnum.Term; if (TermCompare(currentTerm)) { return(true); } } } // if all above fails, we go forward to the next enum, // if one is available currentTerm = null; while (rangeBounds.Count >= 2) { // close the current enum and read next bounds if (actualEnum != null) { actualEnum.Close(); actualEnum = null; } string lowerBound = rangeBounds.First.Value; rangeBounds.RemoveFirst(); this.currentUpperBound = rangeBounds.First.Value; rangeBounds.RemoveFirst(); // create a new enum actualEnum = reader.Terms(termTemplate.CreateTerm(lowerBound)); currentTerm = actualEnum.Term; if (currentTerm != null && TermCompare(currentTerm)) { return(true); } // clear the current term for next iteration currentTerm = null; } // no more sub-range enums available System.Diagnostics.Debug.Assert(rangeBounds.Count == 0 && currentTerm == null); return(false); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; int loadDocumentCount = 0; long loadDocumentDuration = 0; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count); batch.SetIndexingPerformance(indexingPerfStats); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc)) { float boost; IndexingResult indexingResult; try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", indexId, indexingResult.NewDocId), exception); context.AddError(indexId, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount); Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds); } }); UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(indexId, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start, LoadDocumentCount = loadDocumentCount, LoadDocumentDurationMs = loadDocumentDuration }); logIndexing.Debug("Indexed {0} documents for {1}", count, indexId); }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { RecordCurrentBatch("Current", batch.Docs.Count); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message, "OnIndexEntryDeleted Trigger" ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue)) { foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message, "OnIndexEntryCreated Trigger" ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } } }); var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex; IDictionary<string, HashSet<string>> result; while (allReferencedDocs.TryDequeue(out result)) { foreach (var referencedDocument in result) { actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value); foreach (var childDocumentKey in referencedDocument.Value) { dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key); } } } } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message, "Dispose Trigger"); }, x => x.Dispose()); BatchCompleted("Current"); } return new IndexedItemsInfo { ChangedDocs = sourceCount, HighestETag = batch.HighestEtagInBatch }; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, ItemsCount = sourceCount, InputCount = batch.Docs.Count, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
// TODO: Это бизнеслогика в чистом виде. Нужно выносить в отдельный класс. private void IndexBackgroundWorkerDoWork(object sender, DoWorkEventArgs e) { var args = (DoWorkArgs)e.Argument; args.Stage = DoWorkStage.Preparing; args.ProcessedMessages = 0; var indexedCount = 0; int maxMidInDb; int minMidInDb; using (var db = _provider.CreateDBContext()) { args.TotalMessages = db.Messages().Count(); minMidInDb = db.Messages().Min(m => m.ID); maxMidInDb = db.Messages().Max(m => m.ID); } var writer = SearchHelper.CreateIndexWriter(); try { // Направление выборки - от свежих к ранним var lastMid = maxMidInDb; var baseTerm = new Term("mid"); var bw = (BackgroundWorker)sender; bw.ReportProgress(0, args); args.Stage = DoWorkStage.Indexing; while (lastMid >= minMidInDb) { bw.ReportProgress(0, args); if (bw.CancellationPending) { e.Cancel = true; break; } List<MessageSearchInfo> items; var localLastMid = lastMid; using (var db = _provider.CreateDBContext()) items = db .Messages(m => m.ID <= localLastMid) .OrderByDescending(m => m.ID) .Take(_messagesBatchSize) .Select( m => new MessageSearchInfo( m.ID, m.Date, m.Subject, m.Message, m.ForumID, m.UserID, m.UserNick)) .ToList(); var reader = IndexReader.Open(writer.Directory, false); try { foreach (var item in items) { var term = baseTerm.CreateTerm(item.MessageID.ToString()); lastMid = item.MessageID; args.ProcessedMessages += 1; if (!(reader.DocFreq(term) > 0)) { writer.AddDocument(item.CreateDocument()); indexedCount += 1; } } lastMid -= 1; // сдвигаемся на единицу меньше последнего выбранного } finally { reader.Dispose(); } } bw.ReportProgress(0, args); if (args.DoOptimize) { args.Stage = DoWorkStage.Optimizing; bw.ReportProgress(0, args); writer.UseCompoundFile = true; writer.Optimize(); } } finally { writer.Dispose(); } e.Result = indexedCount; }
public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token) { token.ThrowIfCancellationRequested(); var count = 0; var sourceCount = 0; var writeToIndexStats = new List<PerformanceStats>(); IndexingPerformanceStats performance = null; var performanceStats = new List<BasePerformanceStats>(); var storageCommitDuration = new Stopwatch(); actions.BeforeStorageCommit += storageCommitDuration.Start; actions.AfterStorageCommit += () => { storageCommitDuration.Stop(); performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds)); }; Write((indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId)) .Where(x => x != null) .ToList(); try { performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count); var deleteExistingDocumentsDuration = new Stopwatch(); var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc, i) => { token.ThrowIfCancellationRequested(); Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant())); if (batch.SkipDeleteFromIndex[i] == false || context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted? { using (StopwatchScope.For(deleteExistingDocumentsDuration)) { indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); } } return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); performanceStats.Add(new PerformanceStats { Name = IndexingOperation.Lucene_DeleteExistingDocument, DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds }); var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>(); var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>(); var parallelOperations = new ConcurrentQueue<ParallelBatchStats>(); var parallelProcessingStart = SystemTime.UtcNow; BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { token.ThrowIfCancellationRequested(); var parallelStats = new ParallelBatchStats { StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds }; var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName)) { string currentDocId = null; int outputPerDocId = 0; Action<Exception, object> onErrorFunc; bool skipDocument = false; var linqExecutionDuration = new Stopwatch(); var addDocumentDutation = new Stopwatch(); var convertToLuceneDocumentDuration = new Stopwatch(); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration)) { token.ThrowIfCancellationRequested(); float boost; IndexingResult indexingResult; using (StopwatchScope.For(convertToLuceneDocumentDuration)) { try { indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); } catch (Exception e) { onErrorFunc(e, doc); continue; } } // ReSharper disable once RedundantBoolCompare --> code clarity if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false) { continue; } if (currentDocId != indexingResult.NewDocId) { currentDocId = indexingResult.NewDocId; outputPerDocId = 0; skipDocument = false; } if (skipDocument) continue; outputPerDocId++; if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false) { skipDocument = true; continue; } Interlocked.Increment(ref count); using (StopwatchScope.For(convertToLuceneDocumentDuration)) { luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", PublicName, indexingResult.NewDocId), exception); context.AddError( indexId, PublicName, indexingResult.NewDocId, exception, "OnIndexEntryCreated Trigger"); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); using (StopwatchScope.For(addDocumentDutation)) { AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags); allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds)); parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds)); parallelOperations.Enqueue(parallelStats); parallelOperations.Enqueue(parallelStats); } }); performanceStats.Add(new ParallelPerformanceStats { NumberOfThreads = parallelOperations.Count, DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds, BatchedOperations = parallelOperations.ToList() }); var updateDocumentReferencesDuration = new Stopwatch(); using (StopwatchScope.For(updateDocumentReferencesDuration)) { UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags); } performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds)); } catch (Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex); context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger"); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e); context.AddError(indexId, PublicName, null, e, "Dispose Trigger"); }, x => x.Dispose()); } return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering) { ChangedDocs = sourceCount }; }, writeToIndexStats); performanceStats.AddRange(writeToIndexStats); performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats); logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName); return performance; }
public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp) { var count = 0; var sourceCount = 0; var sw = Stopwatch.StartNew(); var start = SystemTime.UtcNow; Write(context, (indexWriter, analyzer, stats) => { var processedKeys = new HashSet<string>(); var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name)) .Where(x => x != null) .ToList(); try { var docIdTerm = new Term(Constants.DocumentIdFieldName); var documentsWrapped = batch.Docs.Select((doc,i) => { Interlocked.Increment(ref sourceCount); if (doc.__document_id == null) throw new ArgumentException( string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc)); string documentId = doc.__document_id.ToString(); if (processedKeys.Add(documentId) == false) return doc; batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'", name, documentId), exception); context.AddError(name, documentId, exception.Message ); }, trigger => trigger.OnIndexEntryDeleted(documentId)); if(batch.SkipDeleteFromIndex[i] == false) indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant())); return doc; }) .Where(x => x is FilteredDocument == false) .ToList(); BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) => { var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition); var luceneDoc = new Document(); var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, actions, stats)) { float boost; var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost); if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false) { Interlocked.Increment(ref count); luceneDoc.GetFields().Clear(); luceneDoc.Boost = boost; documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant()); luceneDoc.Add(documentIdField); foreach (var field in indexingResult.Fields) { luceneDoc.Add(field); } batchers.ApplyAndIgnoreAllErrors( exception => { logIndexing.WarnException( string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'", name, indexingResult.NewDocId), exception); context.AddError(name, indexingResult.NewDocId, exception.Message ); }, trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc)); LogIndexedDocument(indexingResult.NewDocId, luceneDoc); AddDocumentToIndex(indexWriter, luceneDoc, analyzer); } Interlocked.Increment(ref stats.IndexingSuccesses); } }); } catch(Exception e) { batchers.ApplyAndIgnoreAllErrors( ex => { logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex); context.AddError(name, null, ex.Message); }, x => x.AnErrorOccured(e)); throw; } finally { batchers.ApplyAndIgnoreAllErrors( e => { logIndexing.WarnException("Failed to dispose on index update trigger", e); context.AddError(name, null, e.Message); }, x => x.Dispose()); } return sourceCount; }); AddindexingPerformanceStat(new IndexingPerformanceStats { OutputCount = count, InputCount = sourceCount, Duration = sw.Elapsed, Operation = "Index", Started = start }); logIndexing.Debug("Indexed {0} documents for {1}", count, name); }
/// <summary> /// 更新MP3为Artist模式下的ListView中的项 /// </summary> private void UpadateArtistListView(out ListViewItem[] items, ref int indexRecord, int times, ref Term termField) { int nums; items = new ListViewItem[times]; for (int i = 0; i < times; i++) { try { Term term = termField.CreateTerm(Static.MP3Artist[indexRecord]); TermQuery query = new TermQuery(term); this.topDocs = this.indexSearcher.Search(query, 1); nums = this.topDocs.totalHits; } catch (Exception) { nums = 0; continue; } Static.MArtistInfo.Add(new MP3ArtistInfo(Static.MP3Artist[indexRecord], nums)); string[] subItem = { Static.MArtistInfo[indexRecord].artist, Deal.ToEnglishNumString(Static.MArtistInfo[indexRecord].songNums)}; indexRecord++; //artis图标在imagelist中的imagekey是5。 items[i] = new ListViewItem(subItem, 5); } }