public virtual void RunTest(Random random, Directory directory) { IndexWriter writer = new IndexWriter(directory, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2)).SetMergePolicy(NewLogMergePolicy())); for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 1000; FieldType customType = new FieldType(StringField.TYPE_STORED); customType.OmitNorms = true; for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(NewField("id", Convert.ToString(i), customType)); d.Add(NewField("contents", English.IntToEnglish(i), customType)); writer.AddDocument(d); } ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 4; ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new ThreadAnonymousInnerClassHelper(this, iterFinal, customType, iFinal, writerFinal); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); } Assert.IsTrue(!Failed); int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); Assert.AreEqual(expectedDocCount, writer.NumDocs(), "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config); Assert.AreEqual(expectedDocCount, writer.MaxDoc, "index=" + writer.SegString() + " numDocs=" + writer.NumDocs() + " maxDoc=" + writer.MaxDoc + " config=" + writer.Config); writer.Dispose(); writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, ANALYZER).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(2)); DirectoryReader reader = DirectoryReader.Open(directory); Assert.AreEqual(1, reader.Leaves.Count, "reader=" + reader); Assert.AreEqual(expectedDocCount, reader.NumDocs); reader.Dispose(); } writer.Dispose(); }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { lock (this) { //Debug.Assert(!Thread.holdsLock(writer)); this.m_writer = writer; InitMergeThreadPriority(); m_dir = writer.Directory; // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (IsVerbose) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { long startStallTime = 0; while (writer.HasPendingMerges() && MergeThreadCount >= maxMergeCount) { // this means merging has fallen too far behind: we // have already created maxMergeCount threads, and // now there's at least one more merge pending. // Note that only maxThreadCount of // those created merge threads will actually be // running; the rest will be paused (see // updateMergeThreads). We stall this producer // thread to prevent creation of new segments, // until merging has caught up: startStallTime = Environment.TickCount; if (IsVerbose) { Message(" too many merges; stalling..."); } //try //{ Monitor.Wait(this); //} //catch (ThreadInterruptedException ie) // LUCENENET NOTE: Senseless to catch and rethrow the same exception type //{ // throw new ThreadInterruptedException(ie.ToString(), ie); //} } if (IsVerbose) { if (startStallTime != 0) { Message(" stalled for " + (Environment.TickCount - startStallTime) + " msec"); } } MergePolicy.OneMerge merge = writer.NextMerge(); if (merge == null) { if (IsVerbose) { Message(" no more merges pending; now return"); } return; } bool success = false; try { if (IsVerbose) { Message(" consider merge " + writer.SegString(merge.Segments)); } // OK to spawn a new merge thread to handle this // merge: MergeThread merger = GetMergeThread(writer, merge); m_mergeThreads.Add(merger); if (IsVerbose) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(); // Must call this after starting the thread else // the new thread is removed from mergeThreads // (since it's not alive yet): UpdateMergeThreads(); success = true; } finally { if (!success) { writer.MergeFinish(merge); } } } } }
public override void Merge(IndexWriter writer) { this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. Message("now merge"); Message(" index: " + writer.SegString()); // Iterate, pulling from the IndexWriter's queue of // pending merges, until its empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { Message(" no more merges pending; now return"); return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); Message(" consider merge " + merge.SegString(dir)); if (merge.isExternal) { Message(" merge involves segments from an external directory; now run in foreground"); } else { lock (this) { if (MergeThreadCount() < maxThreadCount) { // OK to spawn a new merge thread to handle this // merge: MergeThread merger = new MergeThread(this, writer, merge); mergeThreads.Add(merger); Message(" launch new thread [" + merger.Name + "]"); merger.SetThreadPriority(mergeThreadPriority); merger.IsBackground = true; merger.Start(); continue; } else { Message(" too many merge threads running; run merge in foreground"); } } } // Too many merge threads already running, so we do // this in the foreground of the calling thread writer.Merge(merge); } }
private void Run(CancellationToken cancellationToken) { // First time through the while loop we do the merge // that we were started with: MergePolicy.OneMerge merge = _startingMerge; try { if (_isLoggingEnabled) { _logger.Message(COMPONENT_NAME, " merge thread: start"); } while (true && !cancellationToken.IsCancellationRequested) { RunningMerge = merge; _writer.Merge(merge); // Subsequent times through the loop we do any new // merge that writer says is necessary: merge = _writer.NextMerge(); // Notify here in case any threads were stalled; // they will notice that the pending merge has // been pulled and possibly resume: _resetEvent.Set(); if (merge != null) { if (_isLoggingEnabled) { _logger.Message(COMPONENT_NAME, " merge thread: do another merge " + _writer.SegString(merge.Segments)); } } else { break; } } if (_isLoggingEnabled) { _logger.Message(COMPONENT_NAME, " merge thread: done"); } } catch (Exception exc) { // Ignore the exception if it was due to abort: if (!(exc is MergePolicy.MergeAbortedException)) { //System.out.println(Thread.currentThread().getName() + ": CMS: exc"); //exc.printStackTrace(System.out) _exceptionHandler(exc); } } finally { _isDone = true; if (MergeThreadCompleted != null) { MergeThreadCompleted(this, EventArgs.Empty); } } }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { using (_lock.Write()) { _writer = writer; _directory = writer.Directory; if (Verbose) { Message("now merge"); Message(" index: " + writer.SegString()); } // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { long startStallTime = 0; while (writer.HasPendingMerges() && MergeThreadCount >= MaxMergeCount) { // this means merging has fallen too far behind: we // have already created maxMergeCount threads, and // now there's at least one more merge pending. // Note that only maxThreadCount of // those created merge threads will actually be // running; the rest will be paused (see // updateMergeThreads). We stall this producer // thread to prevent creation of new segments, // until merging has caught up: startStallTime = Environment.TickCount; if (Verbose) { Message(" too many merges; stalling..."); } _manualResetEvent.Reset(); _manualResetEvent.Wait(); } if (Verbose) { if (startStallTime != 0) { Message(" stalled for " + (Environment.TickCount - startStallTime) + " msec"); } } MergePolicy.OneMerge merge = writer.NextMerge(); if (merge == null) { if (Verbose) { Message(" no more merges pending; now return"); } return; } bool success = false; try { if (Verbose) { Message(" consider merge " + writer.SegString(merge.Segments)); } // OK to spawn a new merge thread to handle this // merge: var merger = CreateTask(writer, merge); merger.MergeThreadCompleted += OnMergeThreadCompleted; _mergeThreads.Add(merger); if (Verbose) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(_taskScheduler); // Must call this after starting the thread else // the new thread is removed from mergeThreads // (since it's not alive yet): UpdateMergeThreads(); success = true; } finally { if (!success) { writer.MergeFinish(merge); } } } } }
public override void Merge(IndexWriter writer) { // TODO: enable this once we are on JRE 1.5 // assert !Thread.holdsLock(writer); this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (Verbose()) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { if (Verbose()) Message(" no more merges pending; now return"); return ; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); bool success = false; try { lock (this) { MergeThread merger; while (MergeThreadCount(true) >= maxThreadCount) { if (Verbose()) Message(" too many merge threads running; stalling..."); try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException ie) { // In 3.0 we will change this to throw // InterruptedException instead Support.ThreadClass.Current().Interrupt(); throw new System.SystemException(ie.Message, ie); } } if (Verbose()) Message(" consider merge " + merge.SegString(dir)); // OK to spawn a new merge thread to handle this // merge: merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); if (Verbose()) Message(" launch new thread [" + merger.Name + "]"); merger.Start(); success = true; } } finally { if (!success) { writer.MergeFinish(merge); } } } }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.Config.MaxBufferedDocs; int mergeFactor = ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor; int maxMergeDocs = ((LogMergePolicy)writer.Config.MergePolicy).MaxMergeDocs; int ramSegmentCount = writer.NumBufferedDocuments; Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.SegmentCount; for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound, "docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.SegString() + " config=" + writer.Config); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.SegString() + " config=" + writer.Config); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
public virtual void RunTest(string testName) { m_failed.Value = (false); m_addCount.Value = 0; m_delCount.Value = 0; m_packCount.Value = 0; long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results Random random = new J2N.Randomizer(Random.NextInt64()); using LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); DirectoryInfo tempDir = CreateTempDir(testName); m_dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (m_dir is BaseDirectoryWrapper baseDirectoryWrapper) { baseDirectoryWrapper.CheckIndexOnDispose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TestNightly) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy tieredMergePolicy) { //tieredMergePolicy.MaxMergedSegmentMB = 5000.0; tieredMergePolicy.MaxMergedSegmentMB = 2500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogByteSizeMergePolicy logByteSizeMergePolicy) { //logByteSizeMergePolicy.MaxMergeMB = 1000.0; logByteSizeMergePolicy.MaxMergeMB = 500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogMergePolicy logMergePolicy) { //logMergePolicy.MaxMergeDocs = 100000; logMergePolicy.MaxMergeDocs = 50000; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousClass(this)); if (Verbose) { conf.SetInfoStream(new PrintStreamInfoStreamAnonymousClass(Console.Out)); } m_writer = new IndexWriter(m_dir, conf); TestUtil.ReduceOpenFiles(m_writer); TaskScheduler es = LuceneTestCase.Random.NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 2, 4); //int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 300 : RandomMultiplier; // LUCENENET specific - lowered from 300 to 150 to reduce total time on Nightly // build to less than 1 hour. int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 150 : RandomMultiplier; ISet <string> delIDs = new ConcurrentHashSet <string>(); ISet <string> delPackIDs = new ConcurrentHashSet <string>(); ConcurrentQueue <SubDocs> allSubDocs = new ConcurrentQueue <SubDocs>(); long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (RUN_TIME_SEC * 1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results ThreadJob[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (Verbose) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (Verbose) { Console.WriteLine("TEST: all searching done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (Verbose) { Console.WriteLine("TEST: done join indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]; addCount=" + m_addCount + " delCount=" + m_delCount); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } IndexSearcher s = GetFinalSearcher(); if (Verbose) { Console.WriteLine("TEST: finalSearcher=" + s); } assertFalse(m_failed); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); assertEquals(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { assertEquals(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid"), CultureInfo.InvariantCulture); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = id.ToString(CultureInfo.InvariantCulture); if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + Collections.ToString(delIDs)); doFail = true; } } } assertFalse(doFail); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, s.IndexReader.NumDocs); ReleaseSearcher(s); m_writer.Commit(); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, m_writer.NumDocs); DoClose(); m_writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(m_dir); m_dir.Dispose(); //System.IO.Directory.Delete(tempDir.FullName, true); TestUtil.Rm(tempDir); if (Verbose) { Console.WriteLine("TEST: done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } }
public override void Merge(IndexWriter writer) { // TODO: enable this once we are on JRE 1.5 // assert !Thread.holdsLock(writer); this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (Verbose()) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { if (Verbose()) { Message(" no more merges pending; now return"); } return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); bool success = false; try { lock (this) { MergeThread merger; while (MergeThreadCount() >= maxThreadCount) { if (Verbose()) { Message(" too many merge threads running; stalling..."); } try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException ie) { // In 3.0 we will change this to throw // InterruptedException instead SupportClass.ThreadClass.Current().Interrupt(); throw new System.SystemException(ie.Message, ie); } } if (Verbose()) { Message(" consider merge " + merge.SegString(dir)); } System.Diagnostics.Debug.Assert(MergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this // merge: merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); if (Verbose()) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(); success = true; } } finally { if (!success) { writer.MergeFinish(merge); } } } }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>()); ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>()); IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray())); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { using (_lock.Write()) { _writer = writer; _directory = writer.Directory; if (Verbose()) { Message("now merge"); Message(" index: " + writer.SegString()); } // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { long startStallTime = 0; while (writer.HasPendingMerges() && MergeThreadCount() >= MaxMergeCount) { // this means merging has fallen too far behind: we // have already created maxMergeCount threads, and // now there's at least one more merge pending. // Note that only maxThreadCount of // those created merge threads will actually be // running; the rest will be paused (see // updateMergeThreads). We stall this producer // thread to prevent creation of new segments, // until merging has caught up: startStallTime = Environment.TickCount; if (Verbose()) { Message(" too many merges; stalling..."); } _manualResetEvent.Reset(); _manualResetEvent.Wait(); } if (Verbose()) { if (startStallTime != 0) { Message(" stalled for " + (Environment.TickCount - startStallTime) + " msec"); } } MergePolicy.OneMerge merge = writer.NextMerge; if (merge == null) { if (Verbose()) { Message(" no more merges pending; now return"); } return; } bool success = false; try { if (Verbose()) { Message(" consider merge " + writer.SegString(merge.Segments)); } // OK to spawn a new merge thread to handle this // merge: var merger = CreateTask(writer, merge); merger.MergeThreadCompleted += OnMergeThreadCompleted; _mergeThreads.Add(merger); if (Verbose()) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(_taskScheduler); // Must call this after starting the thread else // the new thread is removed from mergeThreads // (since it's not alive yet): UpdateMergeThreads(); success = true; } finally { if (!success) { writer.MergeFinish(merge); } } } } }
public override void Merge(IndexWriter writer) { this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. Message("now merge"); Message(" index: " + writer.SegString()); // Iterate, pulling from the IndexWriter's queue of // pending merges, until its empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { Message(" no more merges pending; now return"); return ; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); Message(" consider merge " + merge.SegString(dir)); if (merge.isExternal) { Message(" merge involves segments from an external directory; now run in foreground"); } else { lock (this) { if (MergeThreadCount() < maxThreadCount) { // OK to spawn a new merge thread to handle this // merge: MergeThread merger = new MergeThread(this, writer, merge); mergeThreads.Add(merger); Message(" launch new thread [" + merger.Name + "]"); merger.SetThreadPriority(mergeThreadPriority); merger.IsBackground = true; merger.Start(); continue; } else Message(" too many merge threads running; run merge in foreground"); } } // Too many merge threads already running, so we do // this in the foreground of the calling thread writer.Merge(merge); } }
public override void Run() { // First time through the while loop we do the merge // that we were started with: MergePolicy.OneMerge merge = this.startMerge; try { if (outerInstance.IsVerbose) { outerInstance.Message(" merge thread: start"); } while (true) { RunningMerge = merge; outerInstance.DoMerge(merge); // Subsequent times through the loop we do any new // merge that writer says is necessary: merge = tWriter.NextMerge(); // Notify here in case any threads were stalled; // they will notice that the pending merge has // been pulled and possibly resume: lock (outerInstance) { Monitor.PulseAll(outerInstance); } if (merge != null) { outerInstance.UpdateMergeThreads(); if (outerInstance.IsVerbose) { outerInstance.Message(" merge thread: do another merge " + tWriter.SegString(merge.Segments)); } } else { break; } } if (outerInstance.IsVerbose) { outerInstance.Message(" merge thread: done"); } } catch (Exception exc) { // Ignore the exception if it was due to abort: if (!(exc is MergePolicy.MergeAbortedException)) { //System.out.println(Thread.currentThread().getName() + ": CMS: exc"); //exc.printStackTrace(System.out); if (!outerInstance.suppressExceptions) { // suppressExceptions is normally only set during // testing. outerInstance.HandleMergeException(exc); } } } finally { done = true; lock (outerInstance) { outerInstance.UpdateMergeThreads(); Monitor.PulseAll(outerInstance); } } }
public override void Merge(IndexWriter writer) { this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. Message("now merge"); Message(" index: " + writer.SegString()); // Iterate, pulling from the IndexWriter's queue of // pending merges, until its empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { Message(" no more merges pending; now return"); return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); lock (this) { while (MergeThreadCount() >= maxThreadCount) { Message(" too may merge threads running; stalling..."); try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } Message(" consider merge " + merge.SegString(dir)); System.Diagnostics.Debug.Assert(MergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this // merge: MergeThread merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); Message(" launch new thread [" + merger.Name + "]"); merger.Start(); } } }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); TaskScheduler es = null; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet <string> delIDs = new ConcurrentHashSet <string>(new HashSet <string>()); ISet <string> delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>()); IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }