// used by IndexWriterConfig internal LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { this.analyzer = analyzer; this.MatchVersion = matchVersion; RamBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; mergedSegmentWarmer = null; termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here delPolicy = new KeepOnlyLastCommitDeletionPolicy(); Commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; openMode = OpenMode_e.CREATE_OR_APPEND; similarity = IndexSearcher.DefaultSimilarity; mergeScheduler = new ConcurrentMergeScheduler(); writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT; indexingChain = DocumentsWriterPerThread.defaultIndexingChain; codec = Codec.Default; if (codec == null) { throw new System.NullReferenceException(); } infoStream = Util.InfoStream.Default; mergePolicy = new TieredMergePolicy(); flushPolicy = new FlushByRamOrCountsPolicy(); readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); PerThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; }
/// <summary> /// Creates a new config that that handles the live <seealso cref="IndexWriter"/> /// settings. /// </summary> internal LiveIndexWriterConfig(IndexWriterConfig config) { maxBufferedDeleteTerms = config.MaxBufferedDeleteTerms; maxBufferedDocs = config.MaxBufferedDocs; mergedSegmentWarmer = config.MergedSegmentWarmer; RamBufferSizeMB = config.RAMBufferSizeMB; readerTermsIndexDivisor = config.ReaderTermsIndexDivisor; termIndexInterval = config.TermIndexInterval; MatchVersion = config.MatchVersion; analyzer = config.Analyzer; delPolicy = config.DelPolicy; Commit = config.IndexCommit; openMode = config.OpenMode; similarity = config.Similarity; mergeScheduler = config.MergeScheduler; writeLockTimeout = config.WriteLockTimeout; indexingChain = config.IndexingChain; codec = config.Codec; infoStream = config.InfoStream; mergePolicy = config.MergePolicy; indexerThreadPool = config.IndexerThreadPool; readerPooling = config.ReaderPooling; flushPolicy = config.FlushPolicy; PerThreadHardLimitMB = config.RAMPerThreadHardLimitMB; useCompoundFile = config.UseCompoundFile; checkIntegrityAtMerge = config.CheckIntegrityAtMerge; }
public virtual void TestClone() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriterConfig clone = (IndexWriterConfig)conf.Clone(); // Make sure parameters that can't be reused are cloned IndexDeletionPolicy delPolicy = conf.DelPolicy; IndexDeletionPolicy delPolicyClone = clone.DelPolicy; Assert.IsTrue(delPolicy.GetType() == delPolicyClone.GetType() && (delPolicy != delPolicyClone || delPolicy.Clone() == delPolicyClone.Clone())); FlushPolicy flushPolicy = conf.FlushPolicy; FlushPolicy flushPolicyClone = clone.FlushPolicy; Assert.IsTrue(flushPolicy.GetType() == flushPolicyClone.GetType() && (flushPolicy != flushPolicyClone || flushPolicy.Clone() == flushPolicyClone.Clone())); DocumentsWriterPerThreadPool pool = conf.IndexerThreadPool; DocumentsWriterPerThreadPool poolClone = clone.IndexerThreadPool; Assert.IsTrue(pool.GetType() == poolClone.GetType() && (pool != poolClone || pool.Clone() == poolClone.Clone())); MergePolicy mergePolicy = conf.MergePolicy; MergePolicy mergePolicyClone = clone.MergePolicy; Assert.IsTrue(mergePolicy.GetType() == mergePolicyClone.GetType() && (mergePolicy != mergePolicyClone || mergePolicy.Clone() == mergePolicyClone.Clone())); MergeScheduler mergeSched = conf.MergeScheduler; MergeScheduler mergeSchedClone = clone.MergeScheduler; Assert.IsTrue(mergeSched.GetType() == mergeSchedClone.GetType() && (mergeSched != mergeSchedClone || mergeSched.Clone() == mergeSchedClone.Clone())); conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(ConcurrentMergeScheduler), clone.MergeScheduler.GetType()); }
public virtual void TestNoMergeScheduler_Mem() { MergeScheduler ms = NoMergeScheduler.INSTANCE; ms.Dispose(); ms.Merge(null, RandomPicks.RandomFrom(Random, (MergeTrigger[])Enum.GetValues(typeof(MergeTrigger))), Random.NextBoolean()); }
public virtual void TestNoMergeScheduler_Mem() { MergeScheduler ms = NoMergeScheduler.INSTANCE; ms.Dispose(); ms.Merge(null, RandomInts.RandomFrom(Random(), Enum.GetValues(typeof(MergeTrigger)).Cast <MergeTrigger>().ToArray()), Random().NextBoolean()); }
/// <summary> /// Expert: sets the merge scheduler used by this writer. The default is /// <seealso cref="ConcurrentMergeScheduler"/>. /// <p> /// <b>NOTE:</b> the merge scheduler cannot be null. /// /// <p>Only takes effect when IndexWriter is first created. /// </summary> public IndexWriterConfig SetMergeScheduler(MergeScheduler mergeScheduler) { if (mergeScheduler == null) { throw new System.ArgumentException("mergeScheduler must not be null"); } this.mergeScheduler = mergeScheduler; return(this); }
/* * Run one indexer and 2 searchers against single index as * stress test. */ public virtual void RunStressTest(Directory directory, MergeScheduler mergeScheduler) { IndexWriter modifier = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED, null); modifier.SetMaxBufferedDocs(10); TimedThread[] threads = new TimedThread[4]; int numThread = 0; if (mergeScheduler != null) { modifier.SetMergeScheduler(mergeScheduler, null); } // One modifier that writes 10 docs then removes 5, over // and over: IndexerThread indexerThread = new IndexerThread(this, modifier, threads); threads[numThread++] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(this, modifier, threads); threads[numThread++] = indexerThread2; indexerThread2.Start(); // Two searchers that constantly just re-instantiate the // searcher: SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread2; searcherThread2.Start(); for (int i = 0; i < numThread; i++) { threads[i].Join(); } modifier.Close(); for (int i = 0; i < numThread; i++) { Assert.IsTrue(!((TimedThread)threads[i]).failed); } //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
/* * Run one indexer and 2 searchers against single index as * stress test. */ public virtual void RunStressTest(Directory directory, MergeScheduler mergeScheduler) { IndexWriter modifier = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(10).SetMergeScheduler(mergeScheduler)); modifier.Commit(); TimedThread[] threads = new TimedThread[4]; int numThread = 0; // One modifier that writes 10 docs then removes 5, over // and over: IndexerThread indexerThread = new IndexerThread(this, modifier, threads); threads[numThread++] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(this, modifier, threads); threads[numThread++] = indexerThread2; indexerThread2.Start(); // Two searchers that constantly just re-instantiate the // searcher: SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread2; searcherThread2.Start(); for (int i = 0; i < numThread; i++) { threads[i].Join(); } modifier.Dispose(); for (int i = 0; i < numThread; i++) { Assert.IsTrue(!threads[i].Failed); } //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
/* Run one indexer and 2 searchers against single index as stress test. */ public virtual void RunStressTest(Directory directory, MergeScheduler mergeScheduler) { IndexWriter modifier = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(10).SetMergeScheduler(mergeScheduler)); modifier.Commit(); TimedThread[] threads = new TimedThread[4]; int numThread = 0; // One modifier that writes 10 docs then removes 5, over // and over: IndexerThread indexerThread = new IndexerThread(modifier, threads); threads[numThread++] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(modifier, threads); threads[numThread++] = indexerThread2; indexerThread2.Start(); // Two searchers that constantly just re-instantiate the // searcher: SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread2; searcherThread2.Start(); for (int i = 0; i < numThread; i++) { threads[i].Join(); } modifier.Dispose(); for (int i = 0; i < numThread; i++) { Assert.IsTrue(!threads[i].Failed); } //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
/* Run one indexer and 2 searchers against single index as stress test. */ public virtual void RunStressTest(Directory directory, bool autoCommit, MergeScheduler mergeScheduler) { IndexWriter modifier = new IndexWriter(directory, autoCommit, ANALYZER, true); modifier.SetMaxBufferedDocs(10); TimedThread[] threads = new TimedThread[4]; if (mergeScheduler != null) modifier.SetMergeScheduler(mergeScheduler); // One modifier that writes 10 docs then removes 5, over // and over: IndexerThread indexerThread = new IndexerThread(modifier, threads); threads[0] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(modifier, threads); threads[2] = indexerThread2; indexerThread2.Start(); // Two searchers that constantly just re-instantiate the // searcher: SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[3] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[3] = searcherThread2; searcherThread2.Start(); indexerThread.Join(); indexerThread2.Join(); searcherThread1.Join(); searcherThread2.Join(); modifier.Close(); Assert.IsTrue(!indexerThread.failed, "hit unexpected exception in indexer"); Assert.IsTrue(!indexerThread2.failed, "hit unexpected exception in indexer2"); Assert.IsTrue(!searcherThread1.failed, "hit unexpected exception in search1"); Assert.IsTrue(!searcherThread2.failed, "hit unexpected exception in search2"); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
internal readonly Codec Codec; // for writing new segments /// <summary> /// Constructs a new IndexWriter per the settings given in <code>conf</code>. /// If you want to make "live" changes to this writer instance, use /// <seealso cref="#getConfig()"/>. /// /// <p> /// <b>NOTE:</b> after ths writer is created, the given configuration instance /// cannot be passed to another writer. If you intend to do so, you should /// <seealso cref="IndexWriterConfig#clone() clone"/> it beforehand. /// </summary> /// <param name="d"> /// the index directory. The index is either created or appended /// according <code>conf.getOpenMode()</code>. </param> /// <param name="conf"> /// the configuration settings according to which IndexWriter should /// be initialized. </param> /// <exception cref="IOException"> /// if the directory cannot be read/written to, or if it does not /// exist and <code>conf.getOpenMode()</code> is /// <code>OpenMode.APPEND</code> or if there is any other low-level /// IO error </exception> public IndexWriter(Directory d, IndexWriterConfig conf) { /*if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; }*/ readerPool = new ReaderPool(this); conf.SetIndexWriter(this); // prevent reuse by other instances Config_Renamed = new LiveIndexWriterConfig(conf); directory = d; analyzer = Config_Renamed.Analyzer; infoStream = Config_Renamed.InfoStream; mergePolicy = Config_Renamed.MergePolicy; mergePolicy.IndexWriter = this; mergeScheduler = Config_Renamed.MergeScheduler; Codec = Config_Renamed.Codec; BufferedUpdatesStream = new BufferedUpdatesStream(infoStream); PoolReaders = Config_Renamed.ReaderPooling; WriteLock = directory.MakeLock(WRITE_LOCK_NAME); if (!WriteLock.Obtain(Config_Renamed.WriteLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + WriteLock); } bool success = false; try { OpenMode_e? mode = Config_Renamed.OpenMode; bool create; if (mode == OpenMode_e.CREATE) { create = true; } else if (mode == OpenMode_e.APPEND) { create = false; } else { // CREATE_OR_APPEND - create only if an index does not exist create = !DirectoryReader.IndexExists(directory); } // If index is too old, reading the segments will throw // IndexFormatTooOldException. segmentInfos = new SegmentInfos(); bool initialIndexExists = true; if (create) { // Try to read first. this is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.Read(directory); segmentInfos.Clear(); } catch (IOException) { // Likely this means it's a fresh directory initialIndexExists = false; } // Record that we have a change (zero out all // segments) pending: Changed(); } else { segmentInfos.Read(directory); IndexCommit commit = Config_Renamed.IndexCommit; if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. this is important if // readers are open against the future commit // points. if (commit.Directory != directory) { throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); } SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); Changed(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: loaded commit \"" + commit.SegmentsFileName + "\""); } } } RollbackSegments = segmentInfos.CreateBackupSegmentInfos(); // start with previous field numbers, but new FieldInfos GlobalFieldNumberMap = FieldNumberMap; Config_Renamed.FlushPolicy.Init(Config_Renamed); DocWriter = new DocumentsWriter(this, Config_Renamed, directory); eventQueue = DocWriter.EventQueue(); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: lock (this) { Deleter = new IndexFileDeleter(directory, Config_Renamed.DelPolicy, segmentInfos, infoStream, this, initialIndexExists); } if (Deleter.StartingCommitDeleted) { // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. Changed(); } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: hit exception on init; releasing write lock"); } WriteLock.Release(); IOUtils.CloseWhileHandlingException(WriteLock); WriteLock = null; } } }
/// <summary> Expert: set the merge scheduler used by this writer.</summary> public virtual void SetMergeScheduler(MergeScheduler mergeScheduler) { lock (this) { EnsureOpen(); if (mergeScheduler == null) throw new System.NullReferenceException("MergeScheduler must be non-null"); if (this.mergeScheduler != mergeScheduler) { FinishMerges(true); this.mergeScheduler.Close(); } this.mergeScheduler = mergeScheduler; if (infoStream != null) { Message("setMergeScheduler " + mergeScheduler); } } }
public virtual void TestAddDocumentOnDiskFull() { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: pass="******"TEST: cycle: diskFree=" + diskFree); } MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new RAMDirectory()); dir.MaxSizeInBytes = diskFree; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); MergeScheduler ms = writer.Config.MergeScheduler; if (ms is ConcurrentMergeScheduler) { // this test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. ((ConcurrentMergeScheduler)ms).SetSuppressExceptions(); } bool hitError = false; try { for (int i = 0; i < 200; i++) { AddDoc(writer); } if (VERBOSE) { Console.WriteLine("TEST: done adding docs; now commit"); } writer.Commit(); } catch (IOException e) { if (VERBOSE) { Console.WriteLine("TEST: exception on addDoc"); Console.WriteLine(e.StackTrace); } hitError = true; } if (hitError) { if (doAbort) { if (VERBOSE) { Console.WriteLine("TEST: now rollback"); } writer.Rollback(); } else { try { if (VERBOSE) { Console.WriteLine("TEST: now close"); } writer.Dispose(); } catch (IOException e) { if (VERBOSE) { Console.WriteLine("TEST: exception on close; retry w/ no disk space limit"); Console.WriteLine(e.StackTrace); } dir.MaxSizeInBytes = 0; writer.Dispose(); } } //TestUtil.SyncConcurrentMerges(ms); if (TestUtil.AnyFilesExceptWriteLock(dir)) { TestIndexWriter.AssertNoUnreferencedFiles(dir, "after disk full during addDocument"); // Make sure reader can open the index: DirectoryReader.Open(dir).Dispose(); } dir.Dispose(); // Now try again w/ more space: diskFree += TEST_NIGHTLY ? TestUtil.NextInt(Random(), 400, 600) : TestUtil.NextInt(Random(), 3000, 5000); } else { //TestUtil.SyncConcurrentMerges(writer); dir.MaxSizeInBytes = 0; writer.Dispose(); dir.Dispose(); break; } } } }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random().NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } MergeScheduler ms = iwc.MergeScheduler; if (ms is ConcurrentMergeScheduler) { ((ConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random().Next(5) == 3) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random().NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); HashSet <string> files = new HashSet <string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
public virtual void TestAddIndexOnDiskFull() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory") || contentFormat.Equals("Memory")); int START_COUNT = 57; int NUM_DIR = TEST_NIGHTLY ? 50 : 5; int END_COUNT = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5); // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = NewDirectory(); IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Dispose(); string[] files = dirs[i].ListAll(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: MockDirectoryWrapper startDir = NewMockDirectory(); IndexWriter indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(indWriter, j); } indWriter.Dispose(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(57, hits.Length, "first number of hits"); reader.Dispose(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.ListAll(); long diskUsage = startDir.SizeInBytes(); long startDiskUsage = 0; string[] files_ = startDir.ListAll(); for (int i = 0; i < files_.Length; i++) { startDiskUsage += startDir.FileLength(files_[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + TestUtil.NextInt(Random(), 50, 200); int method = iter; bool success = false; bool done = false; string methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + forceMerge(1)"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { Console.WriteLine("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new RAMDirectory(startDir, NewIOContext(Random()))); indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy(false))); IOException err = null; MergeScheduler ms = indWriter.Config.MergeScheduler; for (int x = 0; x < 2; x++) { if (ms is ConcurrentMergeScheduler) // this test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. { if (0 == x) { ((ConcurrentMergeScheduler)ms).SetSuppressExceptions(); } else { ((ConcurrentMergeScheduler)ms).ClearSuppressExceptions(); } } // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double)diskFree) / diskUsage; long thisDiskFree; string testName = null; if (0 == x) { dir.RandomIOExceptionRateOnOpen = Random().NextDouble() * 0.01; thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) { testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } } else { dir.RandomIOExceptionRateOnOpen = 0.0; thisDiskFree = 0; rate = 0.0; if (VERBOSE) { testName = "disk full test " + methodName + " with unlimited disk space"; } } if (VERBOSE) { Console.WriteLine("\ncycle: " + testName); } dir.TrackDiskUsage = true; dir.MaxSizeInBytes = thisDiskFree; dir.RandomIOExceptionRate = rate; try { if (0 == method) { if (VERBOSE) { Console.WriteLine("TEST: now addIndexes count=" + dirs.Length); } indWriter.AddIndexes(dirs); if (VERBOSE) { Console.WriteLine("TEST: now forceMerge"); } indWriter.ForceMerge(1); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = DirectoryReader.Open(dirs[i]); } try { indWriter.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Dispose(); } } } else { indWriter.AddIndexes(dirs); } success = true; if (VERBOSE) { Console.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { Console.WriteLine(" hit IOException: " + e); Console.WriteLine(e.StackTrace); } if (1 == x) { Console.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done TestUtil.SyncConcurrentMerges(indWriter); if (VERBOSE) { Console.WriteLine(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): dir.RandomIOExceptionRateOnOpen = 0.0; try { reader = DirectoryReader.Open(dir); } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.DocFreq(searchTerm); if (success) { if (result != START_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = NewSearcher(reader); try { hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs; } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (result2 != result) { Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } reader.Dispose(); if (VERBOSE) { Console.WriteLine(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { Console.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"); } // Make sure we don't hit disk full during close below: dir.MaxSizeInBytes = 0; dir.RandomIOExceptionRate = 0.0; dir.RandomIOExceptionRateOnOpen = 0.0; indWriter.Dispose(); // Wait for all BG threads to finish else // dir.Dispose() will throw IOException because // there are still open files TestUtil.SyncConcurrentMerges(ms); dir.Dispose(); // Try again with more free space: diskFree += TEST_NIGHTLY ? TestUtil.NextInt(Random(), 4000, 8000) : TestUtil.NextInt(Random(), 40000, 80000); } } startDir.Dispose(); foreach (Directory dir in dirs) { dir.Dispose(); } }
/// <summary> /// Expert: sets the merge scheduler used by this writer. The default is /// <seealso cref="ConcurrentMergeScheduler"/>. /// <p> /// <b>NOTE:</b> the merge scheduler cannot be null. /// /// <p>Only takes effect when IndexWriter is first created. /// </summary> public IndexWriterConfig SetMergeScheduler(MergeScheduler mergeScheduler) { if (mergeScheduler == null) { throw new System.ArgumentException("mergeScheduler must not be null"); } this.mergeScheduler = mergeScheduler; return this; }
/* Run one indexer and 2 searchers against single index as stress test. */ public virtual void RunStressTest(Directory directory, bool autoCommit, MergeScheduler mergeScheduler) { IndexWriter modifier = new IndexWriter(directory, autoCommit, ANALYZER, true); modifier.SetMaxBufferedDocs(10); TimedThread[] threads = new TimedThread[4]; int numThread = 0; if (mergeScheduler != null) modifier.SetMergeScheduler(mergeScheduler); // One modifier that writes 10 docs then removes 5, over // and over: IndexerThread indexerThread = new IndexerThread(this, modifier, threads); threads[numThread++] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(this, modifier, threads); threads[numThread++] = indexerThread2; indexerThread2.Start(); // Two searchers that constantly just re-instantiate the // searcher: SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[numThread++] = searcherThread2; searcherThread2.Start(); for (int i = 0; i < numThread; i++) threads[i].Join(); modifier.Close(); for (int i = 0; i < numThread; i++) Assert.IsTrue(!((TimedThread) threads[i]).failed); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
public virtual void runTest(Directory directory, MergeScheduler merger) { IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(2); if (merger != null) { writer.SetMergeScheduler(merger, null); } for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; writer.MergeFactor = 1000; for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d, null); } writer.MergeFactor = 4; //writer.setInfoStream(System.out); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); } Assert.IsTrue(!failed); int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); // System.out.println("TEST: now index=" + writer.segString()); Assert.AreEqual(expectedDocCount, writer.MaxDoc()); writer.Close(); writer = new IndexWriter(directory, ANALYZER, false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(2); IndexReader reader = IndexReader.Open(directory, true, null); Assert.IsTrue(reader.IsOptimized()); Assert.AreEqual(expectedDocCount, reader.NumDocs()); reader.Close(); } writer.Close(); }
public virtual void runTest(Directory directory, bool autoCommit, MergeScheduler merger) { IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true); writer.SetMaxBufferedDocs(2); if (merger != null) writer.SetMergeScheduler(merger); for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; writer.SetMergeFactor(1000); for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.SetMergeFactor(4); //writer.setInfoStream(System.out); int docCount = writer.DocCount(); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this); } for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); for (int i = 0; i < NUM_THREADS; i++) threads[i].Join(); Assert.IsTrue(!failed); int expectedDocCount = (int) ((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); // System.out.println("TEST: now index=" + writer.segString()); Assert.AreEqual(expectedDocCount, writer.DocCount()); if (!autoCommit) { writer.Close(); writer = new IndexWriter(directory, autoCommit, ANALYZER, false); writer.SetMaxBufferedDocs(2); } IndexReader reader = IndexReader.Open(directory); Assert.IsTrue(reader.IsOptimized()); Assert.AreEqual(expectedDocCount, reader.NumDocs()); reader.Close(); } writer.Close(); }