public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
public virtual void TestForceMergeNotNeeded() { Directory dir = NewDirectory(); AtomicBoolean mayMerge = new AtomicBoolean(true); MergeScheduler mergeScheduler = new SerialMergeSchedulerAnonymousInnerClassHelper(this, mayMerge); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(mergeScheduler).SetMergePolicy(MergePolicy())); writer.Config.MergePolicy.NoCFSRatio = Random().NextBoolean() ? 0 : 1; int numSegments = TestUtil.NextInt(Random(), 2, 20); for (int i = 0; i < numSegments; ++i) { int numDocs = TestUtil.NextInt(Random(), 1, 5); for (int j = 0; j < numDocs; ++j) { writer.AddDocument(new Document()); } writer.Reader.Dispose(); } for (int i = 5; i >= 0; --i) { int segmentCount = writer.SegmentCount; int maxNumSegments = i == 0 ? 1 : TestUtil.NextInt(Random(), 1, 10); mayMerge.Set(segmentCount > maxNumSegments); writer.ForceMerge(maxNumSegments); } writer.Dispose(); dir.Dispose(); }
public virtual void TestTermEnum() { IndexWriter writer = null; writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); // ADD 100 documents with term : aaa // add 100 documents with terms: aaa bbb // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100 for (int i = 0; i < 100; i++) { AddDoc(writer, "aaa"); AddDoc(writer, "aaa bbb"); } writer.Dispose(); // verify document frequency of terms in an multi segment index VerifyDocFreq(); // merge segments writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND)); writer.ForceMerge(1); writer.Dispose(); // verify document frequency of terms in a single segment index VerifyDocFreq(); }
public virtual void TestAllSegmentsLarge() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
public override void SetUp() { base.SetUp(); store = NewDirectory(); IndexWriter writer = new IndexWriter(store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); Document doc; doc = new Document(); doc.Add(NewTextField("aaa", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("aaa", "foo", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("contents", "Tom", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("contents", "Jerry", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("zzz", "bar", Field.Store.YES)); writer.AddDocument(doc); writer.ForceMerge(1); writer.Dispose(); }
public static void Main(string[] args) { if (args.Length < 3) { Console.Error.WriteLine("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ..."); Environment.Exit(1); } FSDirectory mergedIndex = FSDirectory.Open(new System.IO.DirectoryInfo(args[0])); #pragma warning disable 612, 618 using (IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, null) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE))) #pragma warning restore 612, 618 { Directory[] indexes = new Directory[args.Length - 1]; for (int i = 1; i < args.Length; i++) { indexes[i - 1] = FSDirectory.Open(new System.IO.DirectoryInfo(args[i])); } Console.WriteLine("Merging..."); writer.AddIndexes(indexes); Console.WriteLine("Full merge..."); writer.ForceMerge(1); } Console.WriteLine("Done."); }
public virtual void TestLucene() { int num = 100; Directory indexA = NewDirectory(); Directory indexB = NewDirectory(); FillIndex(Random(), indexA, 0, num); bool fail = VerifyIndex(indexA, 0); if (fail) { Assert.Fail("Index a is invalid"); } FillIndex(Random(), indexB, num, num); fail = VerifyIndex(indexB, num); if (fail) { Assert.Fail("Index b is invalid"); } Directory merged = NewDirectory(); IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2))); writer.AddIndexes(indexA, indexB); writer.ForceMerge(1); writer.Dispose(); fail = VerifyIndex(merged, 0); Assert.IsFalse(fail, "The merged index is invalid"); indexA.Dispose(); indexB.Dispose(); merged.Dispose(); }
public virtual void TestFixedBinary() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); var bytes = new byte[4]; BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 24); bytes[1] = (byte)(i >> 16); bytes[2] = (byte)(i >> 8); bytes[3] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 24); bytes[1] = (byte)(expectedValue >> 16); bytes[2] = (byte)(expectedValue >> 8); bytes[3] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestOmitNorms_Mem() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverse d.Add(NewField("f1", "this field has norms", customType)); d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO)); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
public static void BeforeClass() { Dir = NewFSDirectory(CreateTempDir("2Bdocs")); IndexWriter iw = new IndexWriter(Dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); for (int i = 0; i < 262144; i++) { iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); IList<long?> numbers = new List<long?>(); IList<BytesRef> binary = new List<BytesRef>(); IList<BytesRef> sorted = new List<BytesRef>(); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Document d = new Document(); long number = Random().NextLong(); d.Add(new NumericDocValuesField("number", number)); BytesRef bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); d.Add(new BinaryDocValuesField("bytes", bytes)); binary.Add(bytes); bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); d.Add(new SortedDocValuesField("sorted", bytes)); sorted.Add(bytes); w.AddDocument(d); numbers.Add(number); } w.ForceMerge(1); IndexReader r = w.Reader; w.Dispose(); Assert.AreEqual(1, r.Leaves.Count); AtomicReader ar = (AtomicReader)r.Leaves[0].Reader; int numThreads = TestUtil.NextInt(Random(), 2, 5); IList<ThreadClass> threads = new List<ThreadClass>(); CountDownLatch startingGun = new CountDownLatch(1); for (int t = 0; t < numThreads; t++) { Random threadRandom = new Random(Random().Next()); ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, numbers, binary, sorted, numDocs, ar, startingGun, threadRandom); thread.Start(); threads.Add(thread); } startingGun.countDown(); foreach (ThreadClass thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual void TestUniqueValuesCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256); IList<long> values = new List<long>(); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { long value; if (values.Count < uniqueValueCount) { value = Random().NextLong(); values.Add(value); } else { value = RandomInts.RandomFrom(Random(), values); } dvf.LongValue = value; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 20; ++i) { dvf.LongValue = RandomInts.RandomFrom(Random(), values); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs did not cost 8 bytes each Assert.IsTrue(size2 < size1 + 8 * 20); }
public virtual void TestEmptyIndex() { Directory rd1 = NewDirectory(); IndexWriter iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); iw.Dispose(); // create a copy: Directory rd2 = NewDirectory(rd1); Directory rdOut = NewDirectory(); IndexWriter iwOut = new IndexWriter(rdOut, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); ParallelAtomicReader apr = new ParallelAtomicReader(SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(rd1)), SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(rd2))); // When unpatched, Lucene crashes here with a NoSuchElementException (caused by ParallelTermEnum) iwOut.AddIndexes(apr); iwOut.ForceMerge(1); // 2nd try with a readerless parallel reader iwOut.AddIndexes(new ParallelAtomicReader()); iwOut.ForceMerge(1); ParallelCompositeReader cpr = new ParallelCompositeReader(DirectoryReader.Open(rd1), DirectoryReader.Open(rd2)); // When unpatched, Lucene crashes here with a NoSuchElementException (caused by ParallelTermEnum) iwOut.AddIndexes(cpr); iwOut.ForceMerge(1); // 2nd try with a readerless parallel reader iwOut.AddIndexes(new ParallelCompositeReader()); iwOut.ForceMerge(1); iwOut.Dispose(); rdOut.Dispose(); rd1.Dispose(); rd2.Dispose(); }
public virtual void TestNumerics([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BNumerics")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(scheduler).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); NumericDocValuesField dvField = new NumericDocValuesField("dv", 0); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { dvField.LongValue = i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); long expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; NumericDocValues dv = reader.GetNumericDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { Assert.AreEqual(expectedValue, dv.Get(i)); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestPartialMerge() { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); int incrMin = TestNightly ? 15 : 40; for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt32(Random, incrMin, 5 * incrMin)) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 5; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp)); for (int j = 0; j < numDocs; j++) { writer.AddDocument(doc); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Count; ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 5; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(ldmp)); writer.ForceMerge(3); writer.Dispose(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 3) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(3, optSegCount); } } dir.Dispose(); }
internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs) { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs); Document doc = new Document(); doc.Add(NewStringField(field, val, Field.Store.NO)); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100))); for (int i = 0; i < ndocs; i++) { writer.AddDocument(doc); } writer.ForceMerge(1); writer.Dispose(); }
public virtual void TestMixedRAM() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); for (int i = 0; i < 5; i++) { writer.AddDocument(d); } for (int i = 0; i < 20; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(!fi.FieldInfo("f1").OmitsNorms, "OmitNorms field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms, "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
public virtual void TestTermVectorCorruption3() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Document document = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; Field termVectorField = NewField("termVector", "termVector", customType2); document.Add(termVectorField); for (int i = 0; i < 10; i++) { writer.AddDocument(document); } writer.Dispose(); writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); for (int i = 0; i < 6; i++) { writer.AddDocument(document); } writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); for (int i = 0; i < 10; i++) { reader.GetTermVectors(i); reader.Document(i); } reader.Dispose(); dir.Dispose(); }
internal static void ModifyIndex(int i, Directory dir) { switch (i) { case 0: { if (VERBOSE) { Console.WriteLine("TEST: modify index"); } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); w.DeleteDocuments(new Term("field2", "a11")); w.DeleteDocuments(new Term("field2", "b30")); w.Dispose(); break; } case 1: { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); w.ForceMerge(1); w.Dispose(); break; } case 2: { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); w.AddDocument(CreateDocument(101, 4)); w.ForceMerge(1); w.AddDocument(CreateDocument(102, 4)); w.AddDocument(CreateDocument(103, 4)); w.Dispose(); break; } case 3: { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); w.AddDocument(CreateDocument(101, 4)); w.Dispose(); break; } } }
public virtual void TestNoPrxFile() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy; lmp.MergeFactor = 2; lmp.NoCFSRatio = 0.0; Document d = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; Field f1 = NewField("f1", "this field has term freqs", ft); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoPrx(ram); // now add some documents with positions, and check there is no prox after optimization d = new Document(); f1 = NewTextField("f1", "this field has positions", Field.Store.NO); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); AssertNoPrx(ram); ram.Dispose(); }
private void CreateIndex(int numHits) { int numDocs = 500; Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader2) => { return(new TokenStreamComponents(new MockTokenizer(reader2, MockTokenizer.WHITESPACE, true))); }); Directory directory = new SeekCountingDirectory(this, new RAMDirectory()); // note: test explicitly disables payloads IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(false))); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); string content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = this.term1 + " " + this.term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = this.term1 + " " + this.term1; } else { // add a document that contains term2 but not term 1 content = this.term3 + " " + this.term2; } doc.Add(NewTextField(this.field, content, Documents.Field.Store.YES)); writer.AddDocument(doc); } // make sure the index has only a single segment writer.ForceMerge(1); writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(directory)); this.searcher = NewSearcher(reader); }
public virtual void TestSimpleCase() { string[] keywords = new string[] { "1", "2" }; string[] unindexed = new string[] { "Netherlands", "Italy" }; string[] unstored = new string[] { "Amsterdam has lots of bridges", "Venice has lots of canals" }; string[] text = new string[] { "Amsterdam", "Venice" }; Directory dir = NewDirectory(); IndexWriter modifier = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMaxBufferedDeleteTerms(1)); FieldType custom1 = new FieldType(); custom1.Stored = true; for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("id", keywords[i], Field.Store.YES)); doc.Add(NewField("country", unindexed[i], custom1)); doc.Add(NewTextField("contents", unstored[i], Field.Store.NO)); doc.Add(NewTextField("city", text[i], Field.Store.YES)); modifier.AddDocument(doc); } modifier.ForceMerge(1); modifier.Commit(); Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); if (VERBOSE) { Console.WriteLine("\nTEST: now delete by term=" + term); } modifier.DeleteDocuments(term); modifier.Commit(); if (VERBOSE) { Console.WriteLine("\nTEST: now getHitCount"); } hitCount = GetHitCount(dir, term); Assert.AreEqual(0, hitCount); modifier.Dispose(); dir.Dispose(); }
public virtual void TestPartialMerge() { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); int incrMin = TEST_NIGHTLY ? 15 : 40; for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt(Random(), incrMin, 5 * incrMin)) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 5; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp)); for (int j = 0; j < numDocs; j++) { writer.AddDocument(doc); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Size(); ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 5; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(ldmp)); writer.ForceMerge(3); writer.Dispose(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Size(); if (segCount < 3) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(3, optSegCount); } } dir.Dispose(); }
public virtual void Test() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, iwc); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.OmitNorms = true; ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; Field field = new Field("field", new MyTokenStream(), ft); doc.Add(field); int numDocs = (int.MaxValue / 26) + 1; for (int i = 0; i < numDocs; i++) { w.AddDocument(doc); if (VERBOSE && i % 100000 == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); dir.Dispose(); }
public void TestCustomMergeScheduler() { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
public virtual void TestMixedRAM() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have Tf Field f1 = NewField("f1", "this field has term freqs", normalType); d.Add(f1); // this field will NOT have Tf Field f2 = NewField("f2", "this field has NO Tf in all docs", omitType); d.Add(f2); for (int i = 0; i < 5; i++) { writer.AddDocument(d); } for (int i = 0; i < 20; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f1").IndexOptions, "OmitTermFreqAndPositions field bit should not be set."); Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions, "OmitTermFreqAndPositions field bit should be set."); reader.Dispose(); ram.Dispose(); }
public virtual void TestBackgroundForceMerge() { Directory dir = NewDirectory(); for (int pass = 0; pass < 2; pass++) { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51))); Document doc = new Document(); doc.Add(NewStringField("field", "aaa", Field.Store.NO)); for (int i = 0; i < 100; i++) { writer.AddDocument(doc); } writer.ForceMerge(1, false); if (0 == pass) { writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); Assert.AreEqual(1, reader.Leaves.Count); reader.Dispose(); } else { // Get another segment to flush so we can verify it is // NOT included in the merging writer.AddDocument(doc); writer.AddDocument(doc); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); Assert.IsTrue(reader.Leaves.Count > 1); reader.Dispose(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); Assert.AreEqual(2, infos.Count); } } dir.Dispose(); }
public virtual void TestBackgroundForceMerge() { Directory dir = NewDirectory(); for (int pass = 0; pass < 2; pass++) { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51))); Document doc = new Document(); doc.Add(NewStringField("field", "aaa", Field.Store.NO)); for (int i = 0; i < 100; i++) { writer.AddDocument(doc); } writer.ForceMerge(1, false); if (0 == pass) { writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); Assert.AreEqual(1, reader.Leaves.Count); reader.Dispose(); } else { // Get another segment to flush so we can verify it is // NOT included in the merging writer.AddDocument(doc); writer.AddDocument(doc); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); Assert.IsTrue(reader.Leaves.Count > 1); reader.Dispose(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); Assert.AreEqual(2, infos.Size()); } } dir.Dispose(); }
/// <summary> /// Perform the upgrade. </summary> public void Upgrade() { if (!DirectoryReader.IndexExists(dir)) { throw new IndexNotFoundException(dir.ToString()); } if (!deletePriorCommits) { ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); if (commits.Count > 1) { throw new System.ArgumentException("this tool was invoked to not delete prior commit points, but the following commits were found: " + commits); } } IndexWriterConfig c = (IndexWriterConfig)iwc.Clone(); c.MergePolicy = new UpgradeIndexMergePolicy(c.MergePolicy); c.IndexDeletionPolicy = new KeepOnlyLastCommitDeletionPolicy(); IndexWriter w = new IndexWriter(dir, c); try { InfoStream infoStream = c.InfoStream; if (infoStream.IsEnabled("IndexUpgrader")) { infoStream.Message("IndexUpgrader", "Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "..."); } w.ForceMerge(1); if (infoStream.IsEnabled("IndexUpgrader")) { infoStream.Message("IndexUpgrader", "All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION); } } finally { w.Dispose(); } }
public virtual void TestLUCENE_1590() { Document doc = new Document(); // f1 has no norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; FieldType customType2 = new FieldType(); customType2.IsStored = true; doc.Add(NewField("f1", "v1", customType)); doc.Add(NewField("f1", "v2", customType2)); // f2 has no TF FieldType customType3 = new FieldType(TextField.TYPE_NOT_STORED); customType3.IndexOptions = IndexOptions.DOCS_ONLY; Field f = NewField("f2", "v1", customType3); doc.Add(f); doc.Add(NewField("f2", "v2", customType2)); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.AddDocument(doc); writer.ForceMerge(1); // be sure to have a single segment writer.Dispose(); TestUtil.CheckIndex(Dir); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(Dir)); FieldInfos fi = reader.FieldInfos; // f1 Assert.IsFalse(fi.FieldInfo("f1").HasNorms, "f1 should have no norms"); Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f1").IndexOptions, "omitTermFreqAndPositions field bit should not be set for f1"); // f2 Assert.IsTrue(fi.FieldInfo("f2").HasNorms, "f2 should have norms"); Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions, "omitTermFreqAndPositions field bit should be set for f2"); reader.Dispose(); }
public virtual void TestKeepNoneOnInitDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy(this)).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Dispose(); conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy); mp = conf.MergePolicy; mp.NoCFSRatio = 1.0; writer = new IndexWriter(dir, conf); policy = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.ForceMerge(1); writer.Dispose(); Assert.AreEqual(2, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.NumOnCommit); // Simplistic check: just verify the index is in fact // readable: IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.Dispose(); } }
public virtual void TestCommitUserData() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); for (int j = 0; j < 17; j++) { AddDoc(w); } w.Dispose(); DirectoryReader r = DirectoryReader.Open(dir); // commit(Map) never called for this index Assert.AreEqual(0, r.IndexCommit.UserData.Count); r.Dispose(); w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); for (int j = 0; j < 17; j++) { AddDoc(w); } IDictionary <string, string> data = new Dictionary <string, string>(); data["label"] = "test1"; w.SetCommitData(data); w.Dispose(); r = DirectoryReader.Open(dir); Assert.AreEqual("test1", r.IndexCommit.UserData["label"]); r.Dispose(); w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); w.ForceMerge(1); w.Dispose(); dir.Dispose(); }
public virtual void TestTermOrd() { Directory d = NewDirectory(); IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()))); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("f", "a b c", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; TermsEnum terms = GetOnlySegmentReader(r).Fields.Terms("f").Iterator(null); Assert.IsTrue(terms.Next() != null); try { Assert.AreEqual(0, terms.Ord()); } catch (System.NotSupportedException uoe) { // ok -- codec is not required to support this op } r.Dispose(); w.Dispose(); d.Dispose(); }
public virtual void TestByteSizeLimit() { // tests that the max merge size constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); const int numSegments = 15; for (int i = 0; i < numSegments; i++) { int numDocs = i == 7 ? 30 : 1; AddDocs(writer, numDocs); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); double min = sis.Info(0).SizeInBytes(); conf = NewWriterConfig(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20); conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
public virtual void TestLucene() { int num = 100; Directory indexA = NewDirectory(); Directory indexB = NewDirectory(); FillIndex(Random, indexA, 0, num); bool fail = VerifyIndex(indexA, 0); if (fail) { Assert.Fail("Index a is invalid"); } FillIndex(Random, indexB, num, num); fail = VerifyIndex(indexB, num); if (fail) { Assert.Fail("Index b is invalid"); } Directory merged = NewDirectory(); IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy(2))); writer.AddIndexes(indexA, indexB); writer.ForceMerge(1); writer.Dispose(); fail = VerifyIndex(merged, 0); Assert.IsFalse(fail, "The merged index is invalid"); indexA.Dispose(); indexB.Dispose(); merged.Dispose(); }
public virtual void TestNonFlex() { Directory d = NewDirectory(); const int DOC_COUNT = 177; IndexWriter w = new IndexWriter(d, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(7).SetMergePolicy(NewLogMergePolicy())); for (int iter = 0; iter < 2; iter++) { if (iter == 0) { Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field1", "this is field1", Field.Store.NO)); doc.Add(NewTextField("field2", "this is field2", Field.Store.NO)); doc.Add(NewTextField("field3", "aaa", Field.Store.NO)); doc.Add(NewTextField("field4", "bbb", Field.Store.NO)); for (int i = 0; i < DOC_COUNT; i++) { w.AddDocument(doc); } } else { w.ForceMerge(1); } IndexReader r = w.GetReader(); TermsEnum terms = MultiFields.GetTerms(r, "field3").GetEnumerator(); Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("abc"))); r.Dispose(); } w.Dispose(); d.Dispose(); }
public virtual void TestNonFlex() { Directory d = NewDirectory(); const int DOC_COUNT = 177; IndexWriter w = new IndexWriter(d, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7).SetMergePolicy(NewLogMergePolicy())); for (int iter = 0; iter < 2; iter++) { if (iter == 0) { Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field1", "this is field1", Field.Store.NO)); doc.Add(NewTextField("field2", "this is field2", Field.Store.NO)); doc.Add(NewTextField("field3", "aaa", Field.Store.NO)); doc.Add(NewTextField("field4", "bbb", Field.Store.NO)); for (int i = 0; i < DOC_COUNT; i++) { w.AddDocument(doc); } } else { w.ForceMerge(1); } IndexReader r = w.Reader; TermsEnum terms = MultiFields.GetTerms(r, "field3").Iterator(null); Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("abc"))); r.Dispose(); } w.Dispose(); d.Dispose(); }
public virtual void TestLengthPrefixAcrossTwoPages() { Directory d = NewDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); var bytes = new byte[32764]; BytesRef b = new BytesRef(); b.Bytes = bytes; b.Length = bytes.Length; doc.Add(new SortedDocValuesField("field", b)); w.AddDocument(doc); bytes[0] = 1; w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; BinaryDocValues s = FieldCache.DEFAULT.GetTerms(GetOnlySegmentReader(r), "field", false); BytesRef bytes1 = new BytesRef(); s.Get(0, bytes1); Assert.AreEqual(bytes.Length, bytes1.Length); bytes[0] = 0; Assert.AreEqual(b, bytes1); s.Get(1, bytes1); Assert.AreEqual(bytes.Length, bytes1.Length); bytes[0] = 1; Assert.AreEqual(b, bytes1); r.Dispose(); w.Dispose(); d.Dispose(); }
public virtual void TestPositions() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // f1,f2,f3: docs only FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_ONLY; Field f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); Field f2 = NewField("f2", "this field has docs only", ft); d.Add(f2); Field f3 = NewField("f3", "this field has docs only", ft); d.Add(f3); FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS; // f4,f5,f6 docs and freqs Field f4 = NewField("f4", "this field has docs and freqs", ft2); d.Add(f4); Field f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); Field f6 = NewField("f6", "this field has docs and freqs", ft2); d.Add(f6); FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED); ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // f7,f8,f9 docs/freqs/positions Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3); d.Add(f7); Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3); d.Add(f8); Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8, // and docs/freqs/positions for f3, f6, f9 d = new Document(); // f1,f4,f7: docs only f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); f4 = NewField("f4", "this field has docs only", ft); d.Add(f4); f7 = NewField("f7", "this field has docs only", ft); d.Add(f7); // f2, f5, f8: docs and freqs f2 = NewField("f2", "this field has docs and freqs", ft2); d.Add(f2); f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); f8 = NewField("f8", "this field has docs and freqs", ft2); d.Add(f8); // f3, f6, f9: docs and freqs and positions f3 = NewField("f3", "this field has docs and freqs and positions", ft3); d.Add(f3); f6 = NewField("f6", "this field has docs and freqs and positions", ft3); d.Add(f6); f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; // docs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions); // docs + docs/freqs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions); // docs + docs/freqs/pos = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions); // docs/freqs + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions); // docs/freqs + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions); // docs/freqs + docs/freqs/pos = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions); // docs/freqs/pos + docs = docs Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions); // docs/freqs/pos + docs/freqs = docs/freqs Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions); // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions); reader.Dispose(); ram.Dispose(); }
public virtual void TestKeepLastNDeletionPolicy() { const int N = 5; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.ForceMerge(1); writer.Dispose(); } Assert.IsTrue(policy.NumDelete > 0); Assert.AreEqual(N + 1, policy.NumOnInit); Assert.AreEqual(N + 1, policy.NumOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (IOException /*e*/) { if (i != N) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Dispose(); } }
internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs) { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs); Document doc = new Document(); doc.Add(NewStringField(field, val, Field.Store.NO)); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100))); for (int i = 0; i < ndocs; i++) { writer.AddDocument(doc); } writer.ForceMerge(1); writer.Dispose(); }
public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { if ("Lucene3x".Equals(Codec.Default.Name)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public virtual void TestForceMergeTempSpaceUsage() { MockDirectoryWrapper dir = NewMockDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy())); if (VERBOSE) { Console.WriteLine("TEST: config1=" + writer.Config); } for (int j = 0; j < 500; j++) { TestIndexWriter.AddDocWithIndex(writer, j); } int termIndexInterval = writer.Config.TermIndexInterval; // force one extra segment w/ different doc store so // we see the doc stores get merged writer.Commit(); TestIndexWriter.AddDocWithIndex(writer, 500); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: start disk usage"); } long startDiskUsage = 0; string[] files = dir.ListAll(); for (int i = 0; i < files.Length; i++) { startDiskUsage += dir.FileLength(files[i]); if (VERBOSE) { Console.WriteLine(files[i] + ": " + dir.FileLength(files[i])); } } dir.ResetMaxUsedSizeInBytes(); dir.TrackDiskUsage = true; // Import to use same term index interval else a // smaller one here could increase the disk usage and // cause a false failure: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetTermIndexInterval(termIndexInterval).SetMergePolicy(NewLogMergePolicy())); writer.ForceMerge(1); writer.Dispose(); long maxDiskUsage = dir.MaxUsedSizeInBytes; Assert.IsTrue(maxDiskUsage <= 4 * startDiskUsage, "forceMerge used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (4 * startDiskUsage) + " (= 4X starting usage)"); dir.Dispose(); }
public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); var bytes = new byte[2]; BytesRef data = new BytesRef(bytes); SortedDocValuesField dvField = new SortedDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 8); bytes[1] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetSortedDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 8); bytes[1] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestSingleBigValueCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 20000; ++i) { dvf.LongValue = i & 1023; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); dvf.LongValue = long.MaxValue; iwriter.AddDocument(doc); iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new value did not grow the bpv for every other value Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8); }
public virtual void ChangeIndexNoAdds(Random random, Directory dir) { // make sure searching sees right # hits DirectoryReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length, "wrong number of hits"); Document d = searcher.Doc(hits[0].Doc); assertEquals("wrong first document", "0", d.Get("id")); reader.Dispose(); // fully merge IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND)); writer.ForceMerge(1); writer.Dispose(); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length, "wrong number of hits"); DoTestHits(hits, 34, searcher.IndexReader); reader.Dispose(); }
public virtual void TestFullyMergeOldIndex() { foreach (string name in OldNames) { if (VERBOSE) { Console.WriteLine("\nTEST: index=" + name); } Directory dir = NewDirectory(OldIndexDirs[name]); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); w.ForceMerge(1); w.Dispose(); dir.Dispose(); } }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public virtual void TestCommitOnCloseDiskUsage() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal)); MockDirectoryWrapper dir = NewMockDirectory(); Analyzer analyzer; if (Random().NextBoolean()) { // no payloads analyzer = new AnalyzerAnonymousInnerClassHelper(this); } else { // fixed length payloads int length = Random().Next(200); analyzer = new AnalyzerAnonymousInnerClassHelper2(this, length); } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 30; j++) { AddDocWithIndex(writer, j); } writer.Dispose(); dir.ResetMaxUsedSizeInBytes(); dir.TrackDiskUsage = true; long startDiskUsage = dir.MaxUsedSizeInBytes; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 1470; j++) { AddDocWithIndex(writer, j); } long midDiskUsage = dir.MaxUsedSizeInBytes; dir.ResetMaxUsedSizeInBytes(); writer.ForceMerge(1); writer.Dispose(); DirectoryReader.Open(dir).Dispose(); long endDiskUsage = dir.MaxUsedSizeInBytes; // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150)); Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150)); dir.Dispose(); }
public virtual void TestCommitOnCloseForceMerge() { Directory dir = NewDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 17; j++) { AddDocWithIndex(writer, j); } writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND)); writer.ForceMerge(1); // Open a reader before closing (commiting) the writer: DirectoryReader reader = DirectoryReader.Open(dir); // Reader should see index as multi-seg at this // point: Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment"); reader.Dispose(); // Abort the writer: writer.Rollback(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.Open(dir); // Reader should still see index as multi-segment Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment"); reader.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: do real full merge"); } writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND)); writer.ForceMerge(1); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: writer closed"); } TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.Open(dir); // Reader should see index as one segment Assert.AreEqual(1, reader.Leaves.Count, "Reader incorrectly sees more than one segment"); reader.Dispose(); dir.Dispose(); }
public virtual void TestLiveChangeToCFS() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy(true)); // Start false: iwc.SetUseCompoundFile(false); iwc.MergePolicy.NoCFSRatio = 0.0d; IndexWriter w = new IndexWriter(dir, iwc); // Change to true: w.Config.SetUseCompoundFile(true); Document doc = new Document(); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit"); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); w.ForceMerge(1); w.Commit(); // no compound files after merge Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge"); MergePolicy lmp = w.Config.MergePolicy; lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; w.AddDocument(doc); w.ForceMerge(1); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge"); w.Dispose(); dir.Dispose(); }
public virtual void TestAddIndexOnDiskFull() { // MemoryCodec, since it uses FST, is not necessarily // "additive", ie if you add up N small FSTs, then merge // them, the merged result can easily be larger than the // sum because the merged FST may use array encoding for // some arcs (which uses more space): string idFormat = TestUtil.GetPostingsFormat("id"); string contentFormat = TestUtil.GetPostingsFormat("content"); AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory") || contentFormat.Equals("Memory")); int START_COUNT = 57; int NUM_DIR = TEST_NIGHTLY ? 50 : 5; int END_COUNT = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5); // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = NewDirectory(); IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Dispose(); string[] files = dirs[i].ListAll(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: MockDirectoryWrapper startDir = NewMockDirectory(); IndexWriter indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(indWriter, j); } indWriter.Dispose(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(57, hits.Length, "first number of hits"); reader.Dispose(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.ListAll(); long diskUsage = startDir.GetSizeInBytes(); long startDiskUsage = 0; string[] files_ = startDir.ListAll(); for (int i = 0; i < files_.Length; i++) { startDiskUsage += startDir.FileLength(files_[i]); } for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + TestUtil.NextInt(Random(), 50, 200); int method = iter; bool success = false; bool done = false; string methodName; if (0 == method) { methodName = "addIndexes(Directory[]) + forceMerge(1)"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexes(Directory[])"; } while (!done) { if (VERBOSE) { Console.WriteLine("TEST: cycle..."); } // Make a new dir that will enforce disk usage: MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new RAMDirectory(startDir, NewIOContext(Random()))); indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false))); IOException err = null; IMergeScheduler ms = indWriter.Config.MergeScheduler; for (int x = 0; x < 2; x++) { if (ms is IConcurrentMergeScheduler) // this test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. { if (0 == x) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } else { ((IConcurrentMergeScheduler)ms).ClearSuppressExceptions(); } } // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double)diskFree) / diskUsage; long thisDiskFree; string testName = null; if (0 == x) { dir.RandomIOExceptionRateOnOpen = Random().NextDouble() * 0.01; thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (VERBOSE) { testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; } } else { dir.RandomIOExceptionRateOnOpen = 0.0; thisDiskFree = 0; rate = 0.0; if (VERBOSE) { testName = "disk full test " + methodName + " with unlimited disk space"; } } if (VERBOSE) { Console.WriteLine("\ncycle: " + testName); } dir.TrackDiskUsage = true; dir.MaxSizeInBytes = thisDiskFree; dir.RandomIOExceptionRate = rate; try { if (0 == method) { if (VERBOSE) { Console.WriteLine("TEST: now addIndexes count=" + dirs.Length); } indWriter.AddIndexes(dirs); if (VERBOSE) { Console.WriteLine("TEST: now forceMerge"); } indWriter.ForceMerge(1); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = DirectoryReader.Open(dirs[i]); } try { indWriter.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Dispose(); } } } else { indWriter.AddIndexes(dirs); } success = true; if (VERBOSE) { Console.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (IOException e) { success = false; err = e; if (VERBOSE) { Console.WriteLine(" hit IOException: " + e); Console.WriteLine(e.StackTrace); } if (1 == x) { Console.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done TestUtil.SyncConcurrentMerges(indWriter); if (VERBOSE) { Console.WriteLine(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): dir.RandomIOExceptionRateOnOpen = 0.0; try { reader = DirectoryReader.Open(dir); } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.DocFreq(searchTerm); if (success) { if (result != START_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = NewSearcher(reader); try { hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs; } catch (IOException e) { Console.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (result2 != result) { Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { Console.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } reader.Dispose(); if (VERBOSE) { Console.WriteLine(" count is " + result); } if (done || result == END_COUNT) { break; } } if (VERBOSE) { Console.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes"); } // Make sure we don't hit disk full during close below: dir.MaxSizeInBytes = 0; dir.RandomIOExceptionRate = 0.0; dir.RandomIOExceptionRateOnOpen = 0.0; indWriter.Dispose(); // Wait for all BG threads to finish else // dir.Dispose() will throw IOException because // there are still open files TestUtil.SyncConcurrentMerges(ms); dir.Dispose(); // Try again with more free space: diskFree += TEST_NIGHTLY ? TestUtil.NextInt(Random(), 4000, 8000) : TestUtil.NextInt(Random(), 40000, 80000); } } startDir.Dispose(); foreach (Directory dir in dirs) { dir.Dispose(); } }
public virtual void ChangeIndexWithAdds(Random random, Directory dir, string origOldName) { // open writer IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy())); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origOldName, "24") < 0) { expected = 44; } else { expected = 45; } Assert.AreEqual(expected, writer.NumDocs(), "wrong doc count"); writer.Dispose(); // make sure searching sees right # hits IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.IndexReader.Document(hits[0].Doc); assertEquals("wrong first document", "0", d.Get("id")); DoTestHits(hits, 44, searcher.IndexReader); reader.Dispose(); // fully merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy())); writer.ForceMerge(1); writer.Dispose(); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(44, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); DoTestHits(hits, 44, searcher.IndexReader); assertEquals("wrong first document", "0", d.Get("id")); reader.Dispose(); }
public virtual void TestThreadSafety() { Directory dir = NewDirectory(); // NOTE: this also controls the number of threads! int n = TestUtil.NextInt(Random(), 20, 40); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int i = 0; i < n; i++) { writer.AddDocument(CreateDocument(i, 3)); } writer.ForceMerge(1); writer.Dispose(); TestReopen test = new TestReopenAnonymousInnerClassHelper3(this, dir, n); IList <ReaderCouple> readers = new SynchronizedCollection <ReaderCouple>(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader reader = firstReader; ReaderThread[] threads = new ReaderThread[n]; ISet <DirectoryReader> readersToClose = new ConcurrentHashSet <DirectoryReader>(new HashSet <DirectoryReader>()); for (int i = 0; i < n; i++) { if (i % 2 == 0) { DirectoryReader refreshed = DirectoryReader.OpenIfChanged(reader); if (refreshed != null) { readersToClose.Add(reader); reader = refreshed; } } DirectoryReader r = reader; int index = i; ReaderThreadTask task; if (i < 4 || (i >= 10 && i < 14) || i > 18) { task = new ReaderThreadTaskAnonymousInnerClassHelper(this, test, readers, readersToClose, r, index); } else { task = new ReaderThreadTaskAnonymousInnerClassHelper2(this, readers); } threads[i] = new ReaderThread(task); threads[i].Start(); } lock (this) { Monitor.Wait(this, TimeSpan.FromMilliseconds(1000)); } for (int i = 0; i < n; i++) { if (threads[i] != null) { threads[i].StopThread(); } } for (int i = 0; i < n; i++) { if (threads[i] != null) { threads[i].Join(); if (threads[i].Error != null) { string msg = "Error occurred in thread " + threads[i].Name + ":\n" + threads[i].Error.Message; Assert.Fail(msg); } } } foreach (DirectoryReader readerToClose in readersToClose) { readerToClose.Dispose(); } firstReader.Dispose(); reader.Dispose(); foreach (DirectoryReader readerToClose in readersToClose) { AssertReaderClosed(readerToClose, true); } AssertReaderClosed(reader, true); AssertReaderClosed(firstReader, true); dir.Dispose(); }
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged) { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName)); TestUtil.Rm(indexDir); Directory dir = NewFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; mp.MaxCFSSegmentSizeMB = double.PositiveInfinity; // TODO: remove randomness IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc, "wrong doc count"); if (fullyMerged) { writer.ForceMerge(1); } writer.Dispose(); if (!fullyMerged) { // open fresh writer so we get no prx file in the added segment mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; // TODO: remove randomness conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); writer = new IndexWriter(dir, conf); AddNoProxDoc(writer); writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES); writer = new IndexWriter(dir, conf); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); } dir.Dispose(); return indexDir; }
public virtual void TestOpenPriorSnapshot() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Dispose(); ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); Assert.AreEqual(5, commits.Count); IndexCommit lastCommit = null; foreach (IndexCommit commit in commits) { if (lastCommit == null || commit.Generation > lastCommit.Generation) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy)); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs); writer.ForceMerge(1); writer.Dispose(); Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count); // Now open writer on the commit just before merge: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs); // Should undo our rollback: writer.Rollback(); DirectoryReader r = DirectoryReader.Open(dir); // Still merged, still 11 docs Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(11, r.NumDocs); r.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs); // Commits the rollback: writer.Dispose(); // Now 8 because we made another commit Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count); r = DirectoryReader.Open(dir); // Not fully merged because we rolled it back, and now only // 10 docs Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Re-merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy)); writer.ForceMerge(1); writer.Dispose(); r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); writer.Dispose(); // Now reader sees not-fully-merged index: r = DirectoryReader.Open(dir); Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); dir.Dispose(); }
public virtual void TestEmptyIndexWithVectors() { Directory rd1 = NewDirectory(); { if (Verbose) { Console.WriteLine("\nTEST: make 1st writer"); } IndexWriter iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); Field idField = NewTextField("id", "", Field.Store.NO); doc.Add(idField); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; doc.Add(NewField("test", "", customType)); idField.SetStringValue("1"); iw.AddDocument(doc); doc.Add(NewTextField("test", "", Field.Store.NO)); idField.SetStringValue("2"); iw.AddDocument(doc); iw.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); if (Verbose) { Console.WriteLine("\nTEST: make 2nd writer"); } IndexWriter writer = new IndexWriter(rd1, dontMergeConfig); writer.DeleteDocuments(new Term("id", "1")); writer.Dispose(); IndexReader ir = DirectoryReader.Open(rd1); Assert.AreEqual(2, ir.MaxDoc); Assert.AreEqual(1, ir.NumDocs); ir.Dispose(); iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND)); iw.ForceMerge(1); iw.Dispose(); } Directory rd2 = NewDirectory(); { IndexWriter iw = new IndexWriter(rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); iw.AddDocument(doc); iw.Dispose(); } Directory rdOut = NewDirectory(); IndexWriter iwOut = new IndexWriter(rdOut, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); DirectoryReader reader1, reader2; ParallelAtomicReader pr = new ParallelAtomicReader(SlowCompositeReaderWrapper.Wrap(reader1 = DirectoryReader.Open(rd1)), SlowCompositeReaderWrapper.Wrap(reader2 = DirectoryReader.Open(rd2))); // When unpatched, Lucene crashes here with an ArrayIndexOutOfBoundsException (caused by TermVectorsWriter) iwOut.AddIndexes(pr); // ParallelReader closes any IndexReader you added to it: pr.Dispose(); // assert subreaders were closed Assert.AreEqual(0, reader1.RefCount); Assert.AreEqual(0, reader2.RefCount); rd1.Dispose(); rd2.Dispose(); iwOut.ForceMerge(1); iwOut.Dispose(); rdOut.Dispose(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); string term = "term"; for (int i = 0; i < 30; i++) { Document doc = new Document(); sb.Append(term).Append(" "); string content = sb.ToString(); Field noTf = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType); doc.Add(noTf); Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType); doc.Add(tf); writer.AddDocument(doc); //System.out.println(d); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index */ IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); try { searcher.Search(pq, 10); Assert.Fail("did not hit expected exception"); } catch (Exception e) { Exception cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.InnerException != null) { cause = cause.InnerException; } if (!(cause is InvalidOperationException)) { throw new InvalidOperationException("Expected an IAE", e); } // else OK because positions are not indexed } searcher.Search(q1, new CountingHitCollectorAnonymousClass(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new CountingHitCollectorAnonymousClass2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new CountingHitCollectorAnonymousClass3(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new CountingHitCollectorAnonymousClass4(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new CountingHitCollectorAnonymousClass5(this)); Assert.AreEqual(15, CountingHitCollector.Count); reader.Dispose(); dir.Dispose(); }