It is guaranteed that any merges started prior to calling this method will have completed once this method completes.
public virtual void TestMaxNumSegments2([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 4; var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(2) .SetMergePolicy(ldmp) .SetMergeScheduler(newScheduler()); IndexWriter writer = new IndexWriter(dir, config); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) { writer.AddDocument(doc); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Count; writer.ForceMerge(7); writer.Commit(); writer.WaitForMerges(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 7) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(7, optSegCount, "seg: " + segCount); } } writer.Dispose(); dir.Dispose(); }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.Config.MaxBufferedDocs; int mergeFactor = ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor; int maxMergeDocs = ((LogMergePolicy)writer.Config.MergePolicy).MaxMergeDocs; int ramSegmentCount = writer.NumBufferedDocuments; Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.SegmentCount; for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound, "docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.SegString() + " config=" + writer.Config); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.SegString() + " config=" + writer.Config); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
public virtual void TestMaxBufferedDocsChange() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(101).SetMergePolicy(new LogDocMergePolicy()).SetMergeScheduler(new SerialMergeScheduler())); // leftmost* segment has 1 doc // rightmost* segment has 100 docs for (int i = 1; i <= 100; i++) { for (int j = 0; j < i; j++) { AddDoc(writer); CheckInvariants(writer); } writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(101).SetMergePolicy(new LogDocMergePolicy()).SetMergeScheduler(new SerialMergeScheduler())); } writer.Dispose(); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 10; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(10).SetMergePolicy(ldmp).SetMergeScheduler(new SerialMergeScheduler())); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 100; i++) { AddDoc(writer); } CheckInvariants(writer); for (int i = 100; i < 1000; i++) { AddDoc(writer); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); CheckInvariants(writer); writer.Dispose(); dir.Dispose(); }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; }while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
public virtual void TestMergeDocCount0([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { Directory dir = NewDirectory(); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 100; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(ldmp)); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Dispose(); // delete some docs without merging writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); writer.DeleteDocuments(new Term("content", "aaa")); writer.Dispose(); ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 5; var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetOpenMode(OpenMode_e.APPEND) .SetMaxBufferedDocs(10) .SetMergePolicy(ldmp) .SetMergeScheduler(scheduler); writer = new IndexWriter(dir, config); // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc); writer.Dispose(); dir.Dispose(); }
public void TestSegmentWarmer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); w.SetMaxBufferedDocs(2); w.GetReader(null).Close(); w.MergedSegmentWarmer = new AnonymousIndexReaderWarmer(); Document doc = new Document(); doc.Add(new Field("foo", "bar", Field.Store.YES, Field.Index.NOT_ANALYZED)); for (int i = 0; i < 20; i++) { w.AddDocument(doc, null); } w.WaitForMerges(); w.Close(); dir.Close(); }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.MergeFactor; int maxMergeDocs = writer.MaxMergeDocs; int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = - 1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
public void TestSegmentWarmer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetMaxBufferedDocs(2); w.GetReader().Close(); w.MergedSegmentWarmer = new AnonymousIndexReaderWarmer(); Document doc = new Document(); doc.Add(new Field("foo", "bar", Field.Store.YES, Field.Index.NOT_ANALYZED)); for (int i = 0; i < 20; i++) { w.AddDocument(doc); } w.WaitForMerges(); w.Close(); dir.Close(); }
public void Index_Read_And_Write_Ensure_No_Errors_In_Async() { using (var d = new RAMDirectory()) using (var writer = new IndexWriter(d, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED)) using (var customIndexer = IndexInitializer.GetUmbracoIndexer(writer)) using (var customSearcher = IndexInitializer.GetUmbracoSearcher(writer)) { var waitHandle = new ManualResetEvent(false); EventHandler operationComplete = (sender, e) => { //signal that we are done waitHandle.Set(); }; //add the handler for optimized since we know it will be optimized last based on the commit count customIndexer.IndexOperationComplete += operationComplete; //remove the normal indexing error handler customIndexer.IndexingError -= IndexInitializer.IndexingError; //run in async mode customIndexer.RunAsync = true; //get a node from the data repo var node = _contentService.GetPublishedContentByXPath("//*[string-length(@id)>0 and number(@id)>0]") .Root .Elements() .First(); var idQueue = new ConcurrentQueue<int>(Enumerable.Range(1, 10)); var searchThreadCount = 500; var indexThreadCount = 10; var searchCount = 10700; var indexCount = 100; var searchCountPerThread = Convert.ToInt32(searchCount / searchThreadCount); var indexCountPerThread = Convert.ToInt32(indexCount / indexThreadCount); //spawn a bunch of threads to perform some reading var tasks = new List<Task>(); Action<UmbracoExamineSearcher> doSearch = (s) => { try { for (var counter = 0; counter < searchCountPerThread; counter++) { //get next id and put it to the back of the list int docId; if (idQueue.TryDequeue(out docId)) { idQueue.Enqueue(docId); var r = s.Search(s.CreateSearchCriteria().Id(docId).Compile()); Debug.WriteLine("searching thread: {0}, id: {1}, found: {2}", Thread.CurrentThread.ManagedThreadId, docId, r.Count()); Thread.Sleep(50); } } } catch (Exception ex) { Debug.WriteLine("ERROR!! {0}", ex); throw; } }; Action<UmbracoContentIndexer> doIndex = (ind) => { try { //reindex the same node a bunch of times for (var i = 0; i < indexCountPerThread; i++) { //get next id and put it to the back of the list int docId; if (idQueue.TryDequeue(out docId)) { idQueue.Enqueue(docId); var cloned = new XElement(node); cloned.Attribute("id").Value = docId.ToString(CultureInfo.InvariantCulture); Debug.WriteLine("Indexing {0}", docId); ind.ReIndexNode(cloned, IndexTypes.Content); Thread.Sleep(100); } } } catch (Exception ex) { Debug.WriteLine("ERROR!! {0}", ex); throw; } }; //indexing threads for (var i = 0; i < indexThreadCount; i++) { var indexer = customIndexer; tasks.Add(Task.Factory.StartNew(() => doIndex(indexer), TaskCreationOptions.LongRunning)); } //searching threads for (var i = 0; i < searchThreadCount; i++) { var searcher = customSearcher; tasks.Add(Task.Factory.StartNew(() => doSearch(searcher), TaskCreationOptions.LongRunning)); } try { Task.WaitAll(tasks.ToArray()); } catch (AggregateException e) { var sb = new StringBuilder(); sb.Append(e.Message + ": "); foreach (var v in e.InnerExceptions) { sb.Append(v.Message + "; "); } Assert.Fail(sb.ToString()); } //reset the async mode and remove event handler customIndexer.IndexingError += IndexInitializer.IndexingError; customIndexer.RunAsync = false; //wait until we are done waitHandle.WaitOne(); writer.WaitForMerges(); var results = customSearcher.Search(customSearcher.CreateSearchCriteria().NodeName("Home").Compile()); Assert.AreEqual(10, results.Count()); } }
public virtual void TestOptimizeMaxNumSegments2() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogDocMergePolicy ldmp = new LogDocMergePolicy(writer); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(4); writer.SetMaxBufferedDocs(2); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) writer.AddDocument(doc); writer.Commit(); writer.WaitForMerges(); writer.Commit(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Count; writer.Optimize(7); writer.Commit(); sis = new SegmentInfos(); ((ConcurrentMergeScheduler)writer.GetMergeScheduler()).Sync(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 7) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(7, optSegCount); } }
public virtual void TestMergeDocCount0([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { Directory dir = NewDirectory(); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 100; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(ldmp)); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Dispose(); // delete some docs without merging writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); writer.DeleteDocuments(new Term("content", "aaa")); writer.Dispose(); ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 5; var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetOpenMode(OpenMode_e.APPEND) .SetMaxBufferedDocs(10) .SetMergePolicy(ldmp) .SetMergeScheduler(scheduler); writer = new IndexWriter(dir, config); // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc); writer.Dispose(); dir.Dispose(); }
public void Can_Overwrite_Index_During_Indexing_Operation() { using (var d = new RAMDirectory()) using (var writer = new IndexWriter(d, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED)) using (var customIndexer = IndexInitializer.GetUmbracoIndexer(writer)) using (var customSearcher = IndexInitializer.GetUmbracoSearcher(writer)) { var waitHandle = new ManualResetEvent(false); EventHandler operationComplete = (sender, e) => { //signal that we are done waitHandle.Set(); }; //add the handler for optimized since we know it will be optimized last based on the commit count customIndexer.IndexOperationComplete += operationComplete; //remove the normal indexing error handler customIndexer.IndexingError -= IndexInitializer.IndexingError; //run in async mode customIndexer.RunAsync = true; //get a node from the data repo var node = _contentService.GetPublishedContentByXPath("//*[string-length(@id)>0 and number(@id)>0]") .Root .Elements() .First(); //get the id for th node we're re-indexing. var id = (int)node.Attribute("id"); //spawn a bunch of threads to perform some reading var tasks = new List<Task>(); //reindex the same node a bunch of times - then while this is running we'll overwrite below for (var i = 0; i < 1000; i++) { var indexer = customIndexer; tasks.Add(Task.Factory.StartNew(() => { //get next id and put it to the back of the list int docId = i; var cloned = new XElement(node); cloned.Attribute("id").Value = docId.ToString(CultureInfo.InvariantCulture); Debug.WriteLine("Indexing {0}", docId); indexer.ReIndexNode(cloned, IndexTypes.Content); }, TaskCreationOptions.LongRunning)); } Thread.Sleep(100); //overwrite! customIndexer.EnsureIndex(true); try { Task.WaitAll(tasks.ToArray()); } catch (AggregateException e) { var sb = new StringBuilder(); sb.Append(e.Message + ": "); foreach (var v in e.InnerExceptions) { sb.Append(v.Message + "; "); } Assert.Fail(sb.ToString()); } //reset the async mode and remove event handler customIndexer.IndexingError += IndexInitializer.IndexingError; customIndexer.RunAsync = false; //wait until we are done waitHandle.WaitOne(); writer.WaitForMerges(); //ensure no data since it's a new index var results = customSearcher.Search(customSearcher.CreateSearchCriteria().NodeName("Home").Compile()); //should be less than the total inserted because we overwrote it in the middle of processing Debug.WriteLine("TOTAL RESULTS: " + results.TotalItemCount); Assert.Less(results.Count(), 1000); } }
public virtual void TestMaxNumSegments2() { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 4; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetMergePolicy(ldmp).SetMergeScheduler(new ConcurrentMergeScheduler())); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) { writer.AddDocument(doc); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Size(); writer.ForceMerge(7); writer.Commit(); writer.WaitForMerges(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Size(); if (segCount < 7) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(7, optSegCount, "seg: " + segCount); } } writer.Dispose(); dir.Dispose(); }
public virtual void TestSimpleMergedSegmentWramer() { Directory dir = NewDirectory(); AtomicBoolean didWarm = new AtomicBoolean(); InfoStream infoStream = new InfoStreamAnonymousInnerClassHelper(this, didWarm); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetReaderPooling(true).SetInfoStream(infoStream).SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer(infoStream)).SetMergePolicy(NewLogMergePolicy(10))); Document doc = new Document(); doc.Add(NewStringField("foo", "bar", Field.Store.NO)); for (int i = 0; i < 20; i++) { w.AddDocument(doc); } w.WaitForMerges(); w.Dispose(); dir.Dispose(); Assert.IsTrue(didWarm.Get()); }
public void Index_Ensure_No_Duplicates_In_Async() { using (var d = new RAMDirectory()) using (var writer = new IndexWriter(d, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED)) using (var customIndexer = IndexInitializer.GetUmbracoIndexer(writer)) using (var customSearcher = IndexInitializer.GetUmbracoSearcher(writer)) { var waitHandle = new ManualResetEvent(false); EventHandler operationComplete = (sender, e) => { //signal that we are done waitHandle.Set(); }; //add the handler for optimized since we know it will be optimized last based on the commit count customIndexer.IndexOperationComplete += operationComplete; //remove the normal indexing error handler customIndexer.IndexingError -= IndexInitializer.IndexingError; //run in async mode customIndexer.RunAsync = true; //get a node from the data repo var idQueue = new ConcurrentQueue<int>(Enumerable.Range(1, 3)); var node = _contentService.GetPublishedContentByXPath("//*[string-length(@id)>0 and number(@id)>0]") .Root .Elements() .First(); //reindex the same nodes a bunch of times for (var i = 0; i < idQueue.Count * 20; i++) { //get next id and put it to the back of the list int docId; if (idQueue.TryDequeue(out docId)) { idQueue.Enqueue(docId); var cloned = new XElement(node); cloned.Attribute("id").Value = docId.ToString(CultureInfo.InvariantCulture); Debug.WriteLine("Indexing {0}", docId); customIndexer.ReIndexNode(cloned, IndexTypes.Content); Thread.Sleep(100); } } //reset the async mode and remove event handler customIndexer.IndexingError += IndexInitializer.IndexingError; customIndexer.RunAsync = false; //wait until we are done waitHandle.WaitOne(); writer.WaitForMerges(); //ensure no duplicates var results = customSearcher.Search(customSearcher.CreateSearchCriteria().NodeName("Home").Compile()); Assert.AreEqual(3, results.Count()); } }