public virtual void TestMixedMerge() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.SetOmitTermFreqAndPositions(true); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.SetOmitTermFreqAndPositions(true); d.Add(f1); f2.SetOmitTermFreqAndPositions(false); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
public virtual void TestLUCENE_1590() { Document doc = new Document(); // f1 has no norms doc.Add(new Field("f1", "v1", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NO)); // f2 has no TF Field f = new Field("f2", "v1", Field.Store.NO, Field.Index.ANALYZED); f.SetOmitTermFreqAndPositions(true); doc.Add(f); doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NO)); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Optimize(); // be sure to have a single segment writer.Close(); _TestUtil.CheckIndex(dir); SegmentReader reader = SegmentReader.GetOnlySegmentReader(dir); FieldInfos fi = reader.FieldInfos(); // f1 Assert.IsFalse(reader.HasNorms("f1"), "f1 should have no norms"); Assert.IsFalse(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should not be set for f1"); // f2 Assert.IsTrue(reader.HasNorms("f2"), "f2 should have norms"); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should be set for f2"); }
public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.SetOmitTermFreqAndPositions(true); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); }
static DocHelper() { textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); { textField3.SetOmitNorms(true); } keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED); { noTFField.SetOmitTermFreqAndPositions(true); } unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO); unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); fields = new Field[] { textField1, textField2, textField3, compressedTextField2, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException e) { } lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); fields[fields.Length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); fields[fields.Length - 1] = largeLazyField; for (int i = 0; i < fields.Length; i++) { Fieldable f = fields[i]; Add(all, f); if (f.IsIndexed()) { Add(indexed, f); } else { Add(unindexed, f); } if (f.IsTermVectorStored()) { Add(termvector, f); } if (f.IsIndexed() && !f.IsTermVectorStored()) { Add(notermvector, f); } if (f.IsStored()) { Add(stored, f); } else { Add(unstored, f); } if (f.GetOmitNorms()) { Add(noNorms, f); } if (f.GetOmitTf()) { Add(noTf, f); } if (f.IsLazy()) { Add(lazy, f); } } } { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[COMPRESSED_TEXT_FIELD_2_KEY] = FIELD_2_COMPRESSED_TEXT; nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; nameValues[NO_TF_KEY] = NO_TF_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; } }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.SetOmitTermFreqAndPositions(true); d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.SetOmitTermFreqAndPositions(true); d.Add(f1); for (int i = 0; i < 30; i++) writer.AddDocument(d); writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); }
public virtual void TestMixedRAM() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(2); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f2); for (int i = 0; i < 5; i++) writer.AddDocument(d); f2.SetOmitTermFreqAndPositions(true); for (int i = 0; i < 20; i++) writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
// Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public virtual void TestOmitTermFreqAndPositions() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.SetOmitTermFreqAndPositions(true); d.Add(f2); writer.AddDocument(d); writer.Optimize(); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.SetOmitTermFreqAndPositions(true); d.Add(f1); f2.SetOmitTermFreqAndPositions(false); d.Add(f2); writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
static DocHelper() { textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); { textField3.SetOmitNorms(true); } keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED); { noTFField.SetOmitTermFreqAndPositions(true); } unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO); unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); fields = new Field[] { textField1, textField2, textField3, compressedTextField2, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException e) { } lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); fields[fields.Length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); fields[fields.Length - 1] = largeLazyField; for (int i = 0; i < fields.Length; i++) { Fieldable f = fields[i]; Add(all, f); if (f.IsIndexed()) Add(indexed, f); else Add(unindexed, f); if (f.IsTermVectorStored()) Add(termvector, f); if (f.IsIndexed() && !f.IsTermVectorStored()) Add(notermvector, f); if (f.IsStored()) Add(stored, f); else Add(unstored, f); if (f.GetOmitNorms()) Add(noNorms, f); if (f.GetOmitTf()) Add(noTf, f); if (f.IsLazy()) Add(lazy, f); } } { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[COMPRESSED_TEXT_FIELD_2_KEY] = FIELD_2_COMPRESSED_TEXT; nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; nameValues[NO_TF_KEY] = NO_TF_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; } }