public virtual void TestOmitNorms_Mem() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverse d.Add(NewField("f1", "this field has norms", customType)); d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO)); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy())); //writer.setNoCFSRatio(1.0); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; } else if (mod2 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; } else if (mod3 == 0) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; } else { ft.StoreTermVectors = true; } doc.Add(new Field("field", English.IntToEnglish(i), ft)); //test no term vectors too doc.Add(new TextField("noTV", English.IntToEnglish(i), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); }
public virtual void TestOmitNormsCombos() { // indexed with norms FieldType customType = new FieldType(TextField.TYPE_STORED); Field norms = new Field("foo", "a", customType); // indexed without norms FieldType customType1 = new FieldType(TextField.TYPE_STORED); customType1.OmitNorms = true; Field noNorms = new Field("foo", "a", customType1); // not indexed, but stored FieldType customType2 = new FieldType(); customType2.Stored = true; Field noIndex = new Field("foo", "a", customType2); // not indexed but stored, omitNorms is set FieldType customType3 = new FieldType(); customType3.Stored = true; customType3.OmitNorms = true; Field noNormsNoIndex = new Field("foo", "a", customType3); // not indexed nor stored (doesnt exist at all, we index a different field instead) Field emptyNorms = new Field("bar", "a", customType); Assert.IsNotNull(GetNorms("foo", norms, norms)); Assert.IsNull(GetNorms("foo", norms, noNorms)); Assert.IsNotNull(GetNorms("foo", norms, noIndex)); Assert.IsNotNull(GetNorms("foo", norms, noNormsNoIndex)); Assert.IsNotNull(GetNorms("foo", norms, emptyNorms)); Assert.IsNull(GetNorms("foo", noNorms, noNorms)); Assert.IsNull(GetNorms("foo", noNorms, noIndex)); Assert.IsNull(GetNorms("foo", noNorms, noNormsNoIndex)); Assert.IsNull(GetNorms("foo", noNorms, emptyNorms)); Assert.IsNull(GetNorms("foo", noIndex, noIndex)); Assert.IsNull(GetNorms("foo", noIndex, noNormsNoIndex)); Assert.IsNull(GetNorms("foo", noIndex, emptyNorms)); Assert.IsNull(GetNorms("foo", noNormsNoIndex, noNormsNoIndex)); Assert.IsNull(GetNorms("foo", noNormsNoIndex, emptyNorms)); Assert.IsNull(GetNorms("foo", emptyNorms, emptyNorms)); }
public virtual void TestNoNrmFile() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy; lmp.MergeFactor = 2; lmp.NoCFSRatio = 0.0; Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f1 = NewField("f1", "this field has no norms", customType); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoNrm(ram); // force merge writer.ForceMerge(1); // flush writer.Dispose(); AssertNoNrm(ram); ram.Dispose(); }
public virtual void TestMixedRAM() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); for (int i = 0; i < 5; i++) { writer.AddDocument(d); } for (int i = 0; i < 20; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(!fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
private void AddNoProxDoc(IndexWriter writer) { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IndexOptionsValue = IndexOptions.DOCS_ONLY; Field f = new Field("content3", "aaa", customType); doc.Add(f); FieldType customType2 = new FieldType(); customType2.Stored = true; customType2.IndexOptionsValue = IndexOptions.DOCS_ONLY; f = new Field("content4", "aaa", customType2); doc.Add(f); writer.AddDocument(doc); }
private void AddDoc(IndexWriter writer, int id) { Document doc = new Document(); doc.Add(new TextField("content", "aaa", Field.Store.NO)); doc.Add(new StringField("id", Convert.ToString(id), Field.Store.YES)); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.Add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); // add numeric fields, to test if flex preserves encoding doc.Add(new IntField("trieInt", id, Field.Store.NO)); doc.Add(new LongField("trieLong", (long)id, Field.Store.NO)); // add docvalues fields doc.Add(new NumericDocValuesField("dvByte", (sbyte)id)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef @ref = new BytesRef(bytes); doc.Add(new BinaryDocValuesField("dvBytesDerefFixed", @ref)); doc.Add(new BinaryDocValuesField("dvBytesDerefVar", @ref)); doc.Add(new SortedDocValuesField("dvBytesSortedFixed", @ref)); doc.Add(new SortedDocValuesField("dvBytesSortedVar", @ref)); doc.Add(new BinaryDocValuesField("dvBytesStraightFixed", @ref)); doc.Add(new BinaryDocValuesField("dvBytesStraightVar", @ref)); doc.Add(new DoubleDocValuesField("dvDouble", (double)id)); doc.Add(new FloatDocValuesField("dvFloat", (float)id)); doc.Add(new NumericDocValuesField("dvInt", id)); doc.Add(new NumericDocValuesField("dvLong", id)); doc.Add(new NumericDocValuesField("dvPacked", id)); doc.Add(new NumericDocValuesField("dvShort", (short)id)); // a field with both offsets and term vectors for a cross-check FieldType customType3 = new FieldType(TextField.TYPE_STORED); customType3.StoreTermVectors = true; customType3.StoreTermVectorPositions = true; customType3.StoreTermVectorOffsets = true; customType3.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; doc.Add(new Field("content5", "here is more content with aaa aaa aaa", customType3)); // a field that omits only positions FieldType customType4 = new FieldType(TextField.TYPE_STORED); customType4.StoreTermVectors = true; customType4.StoreTermVectorPositions = false; customType4.StoreTermVectorOffsets = true; customType4.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS; doc.Add(new Field("content6", "here is more content with aaa aaa aaa", customType4)); // TODO: // index different norms types via similarity (we use a random one currently?!) // remove any analyzer randomness, explicitly add payloads for certain fields. writer.AddDocument(doc); }
private Document CreateDoc() { Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; doc.Add(NewField("c", "aaa", ft)); return doc; }
public virtual void TestMixedVectrosVectors() { RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetOpenMode(OpenMode.CREATE)); Document doc = new Document(); FieldType ft2 = new FieldType(TextField.TYPE_STORED); ft2.StoreTermVectors = true; FieldType ft3 = new FieldType(TextField.TYPE_STORED); ft3.StoreTermVectors = true; ft3.StoreTermVectorPositions = true; FieldType ft4 = new FieldType(TextField.TYPE_STORED); ft4.StoreTermVectors = true; ft4.StoreTermVectorOffsets = true; FieldType ft5 = new FieldType(TextField.TYPE_STORED); ft5.StoreTermVectors = true; ft5.StoreTermVectorOffsets = true; ft5.StoreTermVectorPositions = true; doc.Add(NewTextField("field", "one", Field.Store.YES)); doc.Add(NewField("field", "one", ft2)); doc.Add(NewField("field", "one", ft3)); doc.Add(NewField("field", "one", ft4)); doc.Add(NewField("field", "one", ft5)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); Query query = new TermQuery(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Fields vectors = searcher.IndexReader.GetTermVectors(hits[0].Doc); Assert.IsNotNull(vectors); Assert.AreEqual(1, vectors.Size()); Terms vector = vectors.Terms("field"); Assert.IsNotNull(vector); Assert.AreEqual(1, vector.Size()); TermsEnum termsEnum = vector.Iterator(null); Assert.IsNotNull(termsEnum.Next()); Assert.AreEqual("one", termsEnum.Term().Utf8ToString()); Assert.AreEqual(5, termsEnum.TotalTermFreq()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsNotNull(dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(5, dpEnum.Freq()); for (int i = 0; i < 5; i++) { Assert.AreEqual(i, dpEnum.NextPosition()); } dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(5, dpEnum.Freq()); for (int i = 0; i < 5; i++) { dpEnum.NextPosition(); Assert.AreEqual(4 * i, dpEnum.StartOffset()); Assert.AreEqual(4 * i + 3, dpEnum.EndOffset()); } reader.Dispose(); }