public SetMergePolicy ( |
||
mergePolicy | ||
Результат |
public virtual void TestSingleNonMergeableSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3, true); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Count); }
public virtual void TestNumDocsLimit() { // tests that the max merge docs constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Count); }
public virtual void TestLastSegmentTooLarge() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Count); }
public virtual void TestAddNumericTwice() { Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.Add(new NumericDocValuesField("dv", 1)); doc.Add(new NumericDocValuesField("dv", 2)); try { iwriter.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void TestTooLargeTermSortedSetBytes() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); byte[] bytes = new byte[100000]; BytesRef b = new BytesRef(bytes); Random().NextBytes((byte[])(Array)bytes); doc.Add(new SortedSetDocValuesField("dv", b)); try { iwriter.AddDocument(doc); Assert.Fail("did not get expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void TestTooLargeSortedBytes() { Analyzer analyzer = new MockAnalyzer(Random); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); var bytes = new byte[100000]; BytesRef b = new BytesRef(bytes); Random.NextBytes(bytes); doc.Add(new SortedDocValuesField("dv", b)); try { iwriter.AddDocument(doc); Assert.Fail("did not get expected exception"); } catch (Exception expected) when(expected.IsIllegalArgumentException()) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void TestDocValuesUnstored() { Directory dir = NewDirectory(); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwconfig.SetMergePolicy(NewLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.Add(new NumericDocValuesField("dv", i)); doc.Add(new TextField("docId", "" + i, Field.Store.YES)); writer.AddDocument(doc); } DirectoryReader r = writer.Reader; AtomicReader slow = SlowCompositeReaderWrapper.Wrap(r); FieldInfos fi = slow.FieldInfos; FieldInfo dvInfo = fi.FieldInfo("dv"); Assert.IsTrue(dvInfo.HasDocValues()); NumericDocValues dv = slow.GetNumericDocValues("dv"); for (int i = 0; i < 50; i++) { Assert.AreEqual(i, dv.Get(i)); Document d = slow.Document(i); // cannot use d.Get("dv") due to another bug! Assert.IsNull(d.GetField("dv")); Assert.AreEqual(Convert.ToString(i), d.Get("docId")); } slow.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestAddBinaryTwice() { Analyzer analyzer = new MockAnalyzer(Random); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!"))); doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!"))); try { iwriter.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void TestIndexWriterDirtSimple() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); TieredMergePolicy tmp = NewTieredMergePolicy(); iwc.SetMergePolicy(tmp); iwc.SetMaxBufferedDocs(2); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, iwc); int numDocs = 2; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(numDocs, w.MaxDoc); Assert.AreEqual(numDocs, w.NumDocs); }
public virtual void TestSingleMergeableTooLargeSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 5, true); // delete the last document writer.DeleteDocuments(new Term("id", "4")); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Count); Assert.IsTrue(sis.Info(0).HasDeletions); }
public void TestEmptyChildFilter() { Directory dir = NewDirectory(); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // we don't want to merge - since we rely on certain segment setup IndexWriter w = new IndexWriter(dir, config); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); docs.Add(MakeResume("Lisa", "United Kingdom")); w.AddDocuments(docs); docs.Clear(); docs.Add(MakeJob("ruby", 2005)); docs.Add(MakeJob("java", 2006)); docs.Add(MakeResume("Frank", "United States")); w.AddDocuments(docs); w.Commit(); int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field for (int i = 0; i < num; i++) { docs.Clear(); docs.Add(MakeJob("java", 2007)); w.AddDocuments(docs); } IndexReader r = DirectoryReader.Open(w, Random().NextBoolean()); w.Dispose(); assertTrue(r.Leaves.size() > 1); IndexSearcher s = new IndexSearcher(r); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); BooleanQuery childQuery = new BooleanQuery(); childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); BooleanQuery fullQuery = new BooleanQuery(); fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST)); fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST)); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true); s.Search(fullQuery, c); TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true); assertFalse(float.IsNaN(results.MaxScore)); assertEquals(1, results.TotalGroupedHitCount); assertEquals(1, results.Groups.Length); IGroupDocs<int> group = results.Groups[0]; Document childDoc = s.Doc(group.ScoreDocs[0].Doc); assertEquals("java", childDoc.Get("skill")); assertNotNull(group.GroupValue); Document parentDoc = s.Doc(group.GroupValue); assertEquals("Lisa", parentDoc.Get("name")); r.Dispose(); dir.Dispose(); }
public virtual void TestMergeFactor() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; lmp.MergeFactor = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 4 segments in the index, because of the merge factor and // max merge docs settings. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(4, sis.Count); }
public override void SetUp() { base.SetUp(); // for now its SimpleText vs Lucene46(random postings format) // as this gives the best overall coverage. when we have more // codecs we should probably pick 2 from Codec.availableCodecs() LeftCodec = Codec.ForName("SimpleText"); RightCodec = new RandomCodec(Random()); LeftDir = NewDirectory(); RightDir = NewDirectory(); long seed = Random().Next(); // must use same seed because of random payloads, etc int maxTermLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random((int)seed)); leftAnalyzer.MaxTokenLength = maxTermLength; MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random((int)seed)); rightAnalyzer.MaxTokenLength = maxTermLength; // but these can be different // TODO: this turns this into a really big test of Multi*, is that what we want? IndexWriterConfig leftConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer); leftConfig.SetCodec(LeftCodec); // preserve docids leftConfig.SetMergePolicy(NewLogMergePolicy()); IndexWriterConfig rightConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer); rightConfig.SetCodec(RightCodec); // preserve docids rightConfig.SetMergePolicy(NewLogMergePolicy()); // must use same seed because of random docvalues fields, etc RandomIndexWriter leftWriter = new RandomIndexWriter(new Random((int)seed), LeftDir, leftConfig); RandomIndexWriter rightWriter = new RandomIndexWriter(new Random((int)seed), RightDir, rightConfig); int numdocs = AtLeast(100); CreateRandomIndex(numdocs, leftWriter, seed); CreateRandomIndex(numdocs, rightWriter, seed); LeftReader = MaybeWrapReader(leftWriter.Reader); leftWriter.Dispose(); RightReader = MaybeWrapReader(rightWriter.Reader); rightWriter.Dispose(); // check that our readers are valid TestUtil.CheckReader(LeftReader); TestUtil.CheckReader(RightReader); Info = "left: " + LeftCodec.ToString() + " / right: " + RightCodec.ToString(); }
public virtual void TestIntersectEmptyString() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(NewStringField("field", "", Field.Store.NO)); doc.Add(NewStringField("field", "abc", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); // add empty string to both documents, so that singletonDocID == -1. // For a FST-based term dict, we'll expect to see the first arc is // flaged with HAS_FINAL_OUTPUT doc.Add(NewStringField("field", "abc", Field.Store.NO)); doc.Add(NewStringField("field", "", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.GetTerms("field"); Automaton automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); // accept ALL CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te = terms.Intersect(ca, null); DocsEnum de; Assert.AreEqual("", te.Next().Utf8ToString()); de = te.Docs(null, null, DocsFlags.NONE); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.AreEqual("abc", te.Next().Utf8ToString()); de = te.Docs(null, null, DocsFlags.NONE); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.IsNull(te.Next()); // pass empty string te = terms.Intersect(ca, new BytesRef("")); Assert.AreEqual("abc", te.Next().Utf8ToString()); de = te.Docs(null, null, DocsFlags.NONE); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void TestSorted() { Directory dir = NewDirectory(); Document doc = new Document(); BytesRef @ref = new BytesRef(); Field field = new SortedDocValuesField("bytes", @ref); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { @ref.CopyChars(TestUtil.RandomUnicodeString(Random())); if (DefaultCodecSupportsDocsWithField() && Random().Next(7) == 0) { iw.AddDocument(new Document()); } iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes"); SortedDocValues single = merged.GetSortedDocValues("bytes"); Assert.AreEqual(single.ValueCount, multi.ValueCount); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); for (int i = 0; i < numDocs; i++) { // check ord Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i)); // check value single.Get(i, expected); multi.Get(i, actual); Assert.AreEqual(expected, actual); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
private IndexWriterConfig NewWriterConfig() { IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); // prevent any merges by default. conf.SetMergePolicy(NoMergePolicy.COMPOUND_FILES); return(conf); }
public virtual void TestIntersectBasic() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(NewTextField("field", "aaa", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "bbb", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "ccc", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.GetTerms("field"); Automaton automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te = terms.Intersect(ca, null); Assert.AreEqual("aaa", te.Next().Utf8ToString()); Assert.AreEqual(0, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("bbb", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("ccc", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("abc")); Assert.AreEqual("bbb", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("ccc", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("aaa")); Assert.AreEqual("bbb", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("ccc", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void TestForceMergeDeletesMaxSegSize() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); TieredMergePolicy tmp = new TieredMergePolicy(); tmp.MaxMergedSegmentMB = 0.01; tmp.ForceMergeDeletesPctAllowed = 0.0; conf.SetMergePolicy(tmp); RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); w.DoRandomForceMerge = false; int numDocs = AtLeast(200); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.NO)); doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); Assert.AreEqual(numDocs, r.MaxDoc); Assert.AreEqual(numDocs, r.NumDocs); r.Dispose(); if (VERBOSE) { Console.WriteLine("\nTEST: delete doc"); } w.DeleteDocuments(new Term("id", "" + (42 + 17))); r = w.GetReader(); Assert.AreEqual(numDocs, r.MaxDoc); Assert.AreEqual(numDocs - 1, r.NumDocs); r.Dispose(); w.ForceMergeDeletes(); r = w.GetReader(); Assert.AreEqual(numDocs - 1, r.MaxDoc); Assert.AreEqual(numDocs - 1, r.NumDocs); r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random.NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); Terms terms = reader.GetTermVector(1, "field"); if (Debugging.AssertsEnabled) { Debugging.Assert(terms != null); } TermsEnum termsEnum = terms.GetIterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(0, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
private static IndexWriter GetWriter(Directory directory) { MergePolicy policy = new LogByteSizeMergePolicy(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.SetMergePolicy(policy); conf.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, conf); return(writer); }
public virtual void TestSepPositionAfterMerge() { Directory dir = NewDirectory(); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); config.SetMergePolicy(NewLogMergePolicy()); config.SetCodec(TestUtil.AlwaysPostingsFormat(new MockSepPostingsFormat())); IndexWriter writer = new IndexWriter(dir, config); try { PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("content", "bbb")); pq.Add(new Term("content", "ccc")); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; doc.Add(NewField("content", "aaa bbb ccc ddd", customType)); // add document and force commit for creating a first segment writer.AddDocument(doc); writer.Commit(); ScoreDoc[] results = this.Search(writer, pq, 5); Assert.AreEqual(1, results.Length); Assert.AreEqual(0, results[0].Doc); // add document and force commit for creating a second segment writer.AddDocument(doc); writer.Commit(); // at this point, there should be at least two segments results = this.Search(writer, pq, 5); Assert.AreEqual(2, results.Length); Assert.AreEqual(0, results[0].Doc); writer.ForceMerge(1); // optimise to merge the segments. results = this.Search(writer, pq, 5); Assert.AreEqual(2, results.Length); Assert.AreEqual(0, results[0].Doc); } finally { writer.Dispose(); dir.Dispose(); } }
public virtual void TestBinary() { Directory dir = NewDirectory(); Document doc = new Document(); BytesRef @ref = new BytesRef(); Field field = new BinaryDocValuesField("bytes", @ref); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { @ref.CopyChars(TestUtil.RandomUnicodeString(Random)); iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes"); BinaryDocValues single = merged.GetBinaryDocValues("bytes"); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); for (int i = 0; i < numDocs; i++) { single.Get(i, expected); multi.Get(i, actual); Assert.AreEqual(expected, actual); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void TestPartialMerge() { int num = AtLeast(10); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.SetMergeScheduler(new SerialMergeScheduler()); TieredMergePolicy tmp = NewTieredMergePolicy(); conf.SetMergePolicy(tmp); conf.SetMaxBufferedDocs(2); tmp.MaxMergeAtOnce = 3; tmp.SegmentsPerTier = 6; IndexWriter w = new IndexWriter(dir, conf); int maxCount = 0; int numDocs = TestUtil.NextInt32(Random, 20, 100); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO)); w.AddDocument(doc); int count = w.SegmentCount; maxCount = Math.Max(count, maxCount); Assert.IsTrue(count >= maxCount - 3, "count=" + count + " maxCount=" + maxCount); } w.Flush(true, true); int segmentCount = w.SegmentCount; int targetCount = TestUtil.NextInt32(Random, 1, segmentCount); if (VERBOSE) { Console.WriteLine("TEST: merge to " + targetCount + " segs (current count=" + segmentCount + ")"); } w.ForceMerge(targetCount); Assert.AreEqual(targetCount, w.SegmentCount); w.Dispose(); dir.Dispose(); } }
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged) { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/4x/", dirName)); TestUtil.Rm(indexDir); Directory dir = NewFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; mp.MaxCFSSegmentSizeMB = double.PositiveInfinity; // TODO: remove randomness IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc, "wrong doc count"); if (fullyMerged) { writer.ForceMerge(1); } writer.Dispose(); if (!fullyMerged) { // open fresh writer so we get no prx file in the added segment mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; // TODO: remove randomness conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS); writer = new IndexWriter(dir, conf); AddNoProxDoc(writer); writer.Dispose(); writer = new IndexWriter(dir, conf.SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES)); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); } dir.Dispose(); return(indexDir); }
public virtual void TestNumerics() { Directory dir = NewDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { field.SetInt64Value(Random().NextLong()); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers"); NumericDocValues single = merged.GetNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void TestForceMergeDeletes() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); TieredMergePolicy tmp = NewTieredMergePolicy(); conf.SetMergePolicy(tmp); conf.SetMaxBufferedDocs(4); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, conf); for (int i = 0; i < 80; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(80, w.NumDocs); if (VERBOSE) { Console.WriteLine("\nTEST: delete docs"); } w.DeleteDocuments(new Term("content", "0")); w.ForceMergeDeletes(); Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(60, w.NumDocs); if (VERBOSE) { Console.WriteLine("\nTEST: forceMergeDeletes2"); } ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0; w.ForceMergeDeletes(); Assert.AreEqual(60, w.NumDocs); Assert.AreEqual(60, w.MaxDoc); w.Dispose(); dir.Dispose(); }
public virtual void TestMaxMergeCount() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int maxMergeCount = TestUtil.NextInt(Random(), 1, 5); int maxMergeThreads = TestUtil.NextInt(Random(), 1, maxMergeCount); CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount); AtomicInteger runningMergeCount = new AtomicInteger(0); AtomicBoolean failed = new AtomicBoolean(); if (VERBOSE) { Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads); } ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed); cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads); iwc.SetMergeScheduler(cms); iwc.SetMaxBufferedDocs(2); TieredMergePolicy tmp = new TieredMergePolicy(); iwc.SetMergePolicy(tmp); tmp.MaxMergeAtOnce = 2; tmp.SegmentsPerTier = 2; IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED)); while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get()) { for (int i = 0; i < 10; i++) { w.AddDocument(doc); } } w.Dispose(false); dir.Dispose(); }
public virtual void TestLiveChangeToCFS() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy(true)); // Start false: iwc.SetUseCompoundFile(false); iwc.MergePolicy.NoCFSRatio = 0.0d; IndexWriter w = new IndexWriter(dir, iwc); // Change to true: w.Config.SetUseCompoundFile(true); Document doc = new Document(); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit"); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); w.ForceMerge(1); w.Commit(); // no compound files after merge Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge"); MergePolicy lmp = w.Config.MergePolicy; lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; w.AddDocument(doc); w.ForceMerge(1); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge"); w.Dispose(); dir.Dispose(); }
/// <summary> /// Perform the upgrade. </summary> public void Upgrade() { if (!DirectoryReader.IndexExists(Dir)) { throw new IndexNotFoundException(Dir.ToString()); } if (!DeletePriorCommits) { ICollection <IndexCommit> commits = DirectoryReader.ListCommits(Dir); if (commits.Count > 1) { throw new System.ArgumentException("this tool was invoked to not delete prior commit points, but the following commits were found: " + commits); } } IndexWriterConfig c = (IndexWriterConfig)Iwc.Clone(); c.SetMergePolicy(new UpgradeIndexMergePolicy(c.MergePolicy)); c.SetIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); IndexWriter w = new IndexWriter(Dir, c); try { InfoStream infoStream = c.InfoStream; if (infoStream.IsEnabled("IndexUpgrader")) { infoStream.Message("IndexUpgrader", "Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + Dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "..."); } w.ForceMerge(1); if (infoStream.IsEnabled("IndexUpgrader")) { infoStream.Message("IndexUpgrader", "All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION); } } finally { w.Dispose(); } }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); config.SetMergePolicy(NewLogMergePolicy()); config.SetSimilarity(new TestSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config); Document doc = new Document(); Field foo = NewTextField("foo", "", Field.Store.NO); doc.Add(foo); for (int i = 0; i < 100; i++) { foo.StringValue = AddValue(); writer.AddDocument(doc); } reader = writer.Reader; writer.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); AtomicReader sr = SlowCompositeReaderWrapper.Wrap(reader); DocsAndPositionsEnum de = sr.GetTermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestLiveChangeToCFS() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy(true)); // Start false: iwc.SetUseCompoundFile(false); iwc.MergePolicy.NoCFSRatio = 0.0d; IndexWriter w = new IndexWriter(dir, iwc); // Change to true: w.Config.SetUseCompoundFile(true); Document doc = new Document(); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit"); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); w.ForceMerge(1); w.Commit(); // no compound files after merge Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge"); MergePolicy lmp = w.Config.MergePolicy; lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; w.AddDocument(doc); w.ForceMerge(1); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge"); w.Dispose(); dir.Dispose(); }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test MergeScheduler Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } catch (System.ArgumentException e) { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } catch (System.ArgumentException e) { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } catch (System.ArgumentException e) { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } catch (System.ArgumentException e) { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } catch (System.ArgumentException e) { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.BufferedUpdatesStreamAny); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.Reader; Assert.IsFalse(writer.BufferedUpdatesStreamAny); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.Reader; int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* /// // added docs are in the ram buffer /// for (int x = 15; x < 20; x++) { /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); /// } /// Assert.IsTrue(writer.numRamDocs() > 0); /// // delete from the ram buffer /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); /// /// Term id3 = new Term("id", Integer.toString(3)); /// /// // delete from the 1st segment /// writer.DeleteDocuments(id3); /// /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// //System.out /// // .println("segdels1:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// // we cause a merge to happen /// fsmp.doMerge = true; /// fsmp.start = 0; /// fsmp.Length = 2; /// System.out.println("maybeMerge "+writer.SegmentInfos); /// /// SegmentInfo info0 = writer.SegmentInfos.Info(0); /// SegmentInfo info1 = writer.SegmentInfos.Info(1); /// /// writer.MaybeMerge(); /// System.out.println("maybeMerge after "+writer.SegmentInfos); /// // there should be docs in RAM /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// // assert we've merged the 1 and 2 segments /// // and still have a segment leftover == 2 /// Assert.AreEqual(2, writer.SegmentInfos.Size()); /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); /// /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// IndexReader r = writer.GetReader(); /// IndexReader r1 = r.getSequentialSubReaders()[0]; /// printDelDocs(r1.GetLiveDocs()); /// int[] docs = toDocsArray(id3, null, r); /// System.out.println("id3 docs:"+Arrays.toString(docs)); /// // there shouldn't be any docs for id:3 /// Assert.IsTrue(docs == null); /// r.Dispose(); /// /// part2(writer, fsmp); /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
/// <summary> /// create a new index writer config with random defaults using the specified random </summary> public static IndexWriterConfig NewIndexWriterConfig(Random r, LuceneVersion v, Analyzer a) { IndexWriterConfig c = new IndexWriterConfig(v, a); c.SetSimilarity(ClassEnvRule.Similarity); if (VERBOSE) { // Even though TestRuleSetupAndRestoreClassEnv calls // InfoStream.setDefault, we do it again here so that // the PrintStreamInfoStream.messageID increments so // that when there are separate instances of // IndexWriter created we see "IW 0", "IW 1", "IW 2", // ... instead of just always "IW 0": c.InfoStream = new TestRuleSetupAndRestoreClassEnv.ThreadNameFixingPrintStreamInfoStream(Console.Out); } if (r.NextBoolean()) { c.SetMergeScheduler(new SerialMergeScheduler()); } else if (Rarely(r)) { int maxThreadCount = TestUtil.NextInt(Random(), 1, 4); int maxMergeCount = TestUtil.NextInt(Random(), maxThreadCount, maxThreadCount + 4); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount); c.SetMergeScheduler(cms); } if (r.NextBoolean()) { if (Rarely(r)) { // crazy value c.SetMaxBufferedDocs(TestUtil.NextInt(r, 2, 15)); } else { // reasonable value c.SetMaxBufferedDocs(TestUtil.NextInt(r, 16, 1000)); } } if (r.NextBoolean()) { if (Rarely(r)) { // crazy value c.SetTermIndexInterval(r.NextBoolean() ? TestUtil.NextInt(r, 1, 31) : TestUtil.NextInt(r, 129, 1000)); } else { // reasonable value c.SetTermIndexInterval(TestUtil.NextInt(r, 32, 128)); } } if (r.NextBoolean()) { int maxNumThreadStates = Rarely(r) ? TestUtil.NextInt(r, 5, 20) : TestUtil.NextInt(r, 1, 4); // reasonable value - crazy value if (Rarely(r)) { // Retrieve the package-private setIndexerThreadPool // method: MethodInfo setIndexerThreadPoolMethod = typeof(IndexWriterConfig).GetMethod("SetIndexerThreadPool", new Type[] { typeof(DocumentsWriterPerThreadPool) }); //setIndexerThreadPoolMethod.setAccessible(true); Type clazz = typeof(RandomDocumentsWriterPerThreadPool); ConstructorInfo ctor = clazz.GetConstructor(new[] { typeof(int), typeof(Random) }); //ctor.Accessible = true; // random thread pool setIndexerThreadPoolMethod.Invoke(c, new[] { ctor.Invoke(new object[] { maxNumThreadStates, r }) }); } else { // random thread pool c.SetMaxThreadStates(maxNumThreadStates); } } c.SetMergePolicy(NewMergePolicy(r)); if (Rarely(r)) { c.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.InfoStream)); } c.SetUseCompoundFile(r.NextBoolean()); c.SetReaderPooling(r.NextBoolean()); c.SetReaderTermsIndexDivisor(TestUtil.NextInt(r, 1, 4)); c.SetCheckIntegrityAtMerge(r.NextBoolean()); return c; }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random()); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.IntToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq(); for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset(); Debug.Assert(start >= 0); int end = dp.EndOffset(); Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.Payload); BytesRef payload = dp.Payload; Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq(); for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset(); Debug.Assert(start >= 0); int end = dp.EndOffset(); Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred")); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.Payload); BytesRef payload = dp.Payload; Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestMaxMergeCount() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int maxMergeCount = TestUtil.NextInt(Random(), 1, 5); int maxMergeThreads = TestUtil.NextInt(Random(), 1, maxMergeCount); CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount); AtomicInteger runningMergeCount = new AtomicInteger(0); AtomicBoolean failed = new AtomicBoolean(); if (VERBOSE) { Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads); } ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed); cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads); iwc.SetMergeScheduler(cms); iwc.SetMaxBufferedDocs(2); TieredMergePolicy tmp = new TieredMergePolicy(); iwc.SetMergePolicy(tmp); tmp.MaxMergeAtOnce = 2; tmp.SegmentsPerTier = 2; IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED)); while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get()) { for (int i = 0; i < 10; i++) { w.AddDocument(doc); } } w.Dispose(false); dir.Dispose(); }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { conf.SetMergePolicy(new LogDocMergePolicy()); IndexWriter writer = new IndexWriter(dir, conf); return writer; }
/// <summary> /// Override this to customize index settings, e.g. which /// codec to use. /// </summary> protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion, Analyzer indexAnalyzer, IndexWriterConfig.OpenMode_e openMode) { IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); iwc.SetCodec(new Lucene46Codec()); iwc.SetOpenMode(openMode); // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: iwc.SetMergePolicy(new SortingMergePolicy(iwc.MergePolicy, SORT)); return iwc; }
private static IndexWriter GetWriter(Directory directory) { MergePolicy policy = new LogByteSizeMergePolicy(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetMergePolicy(policy); conf.SetOpenMode(OpenMode_e.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, conf); return writer; }