public override void SetUp() { base.SetUp(); Dir = NewFSDirectory(CreateTempDir("testDFBlockSize")); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); Iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); Iw = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)Iwc.Clone()); Iw.RandomForceMerge = false; // we will ourselves }
public virtual void TestPostings() { Directory dir = NewFSDirectory(CreateTempDir("postings")); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetCodec(Codec.ForName("Lucene40")); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); // id field FieldType idType = new FieldType(StringField.TYPE_NOT_STORED); idType.StoreTermVectors = true; Field idField = new Field("id", "", idType); doc.Add(idField); // title field: short text field FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED); titleType.StoreTermVectors = true; titleType.StoreTermVectorPositions = true; titleType.StoreTermVectorOffsets = true; titleType.IndexOptions = IndexOptions(); Field titleField = new Field("title", "", titleType); doc.Add(titleField); // body field: long text field FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED); bodyType.StoreTermVectors = true; bodyType.StoreTermVectorPositions = true; bodyType.StoreTermVectorOffsets = true; bodyType.IndexOptions = IndexOptions(); Field bodyField = new Field("body", "", bodyType); doc.Add(bodyField); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { idField.SetStringValue(Convert.ToString(i)); titleField.SetStringValue(FieldValue(1)); bodyField.SetStringValue(FieldValue(3)); iw.AddDocument(doc); if (Random.Next(20) == 0) { iw.DeleteDocuments(new Term("id", Convert.ToString(i))); } } if (Random.NextBoolean()) { // delete 1-100% of docs iw.DeleteDocuments(new Term("title", terms[Random.Next(terms.Length)])); } iw.Dispose(); dir.Dispose(); // checkindex }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new MockTokenizer(reader); if (fieldName.Contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return(new TokenStreamComponents(tokenizer, filter)); } else if (fieldName.Contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return(new TokenStreamComponents(tokenizer, filter)); } else { return(new TokenStreamComponents(tokenizer)); } }, reuseStrategy: Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random, dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.Int32ToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random); field1.SetStringValue(stringValue); field2.SetStringValue(stringValue); field3.SetStringValue(stringValue); field4.SetStringValue(stringValue); field5.SetStringValue(stringValue); field6.SetStringValue(stringValue); field7.SetStringValue(stringValue); field8.SetStringValue(stringValue); iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
public virtual void TestChangeCodecAndMerge() { Directory dir = NewDirectory(); if (VERBOSE) { Console.WriteLine("TEST: make new index"); } IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec()); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = NewWriter(dir, iwconf); AddDocs(writer, 10); writer.Commit(); AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: addDocs3"); } AddDocs3(writer, 10); writer.Commit(); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "aaa"), dir, 10); Codec codec = iwconf.Codec; iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetOpenMode(OpenMode.APPEND).SetCodec(codec); // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.SetCodec(new MockCodec2()); // uses standard for field content writer = NewWriter(dir, iwconf); // swap in new codec for currently written segments if (VERBOSE) { Console.WriteLine("TEST: add docs w/ Standard codec for content field"); } AddDocs2(writer, 10); writer.Commit(); codec = iwconf.Codec; Assert.AreEqual(30, writer.MaxDoc); AssertQuery(new Term("content", "bbb"), dir, 10); AssertQuery(new Term("content", "ccc"), dir, 10); //// AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: add more docs w/ new codec"); } AddDocs2(writer, 10); writer.Commit(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); Assert.AreEqual(40, writer.MaxDoc); if (VERBOSE) { Console.WriteLine("TEST: now optimize"); } writer.ForceMerge(1); Assert.AreEqual(40, writer.MaxDoc); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); dir.Dispose(); }