Beispiel #1
0
        public virtual void TestOmitNorms_Mem()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            writer.AddDocument(d);
            writer.ForceMerge(1);
            // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverse
            d.Add(NewField("f1", "this field has norms", customType));

            d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO));

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Beispiel #2
0
 public static void BeforeClass()
 {
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy()));
     //writer.setNoCFSRatio(1.0);
     //writer.infoStream = System.out;
     for (int i = 0; i < 1000; i++)
     {
         Document doc = new Document();
         FieldType ft = new FieldType(TextField.TYPE_STORED);
         int mod3 = i % 3;
         int mod2 = i % 2;
         if (mod2 == 0 && mod3 == 0)
         {
             ft.StoreTermVectors = true;
             ft.StoreTermVectorOffsets = true;
             ft.StoreTermVectorPositions = true;
         }
         else if (mod2 == 0)
         {
             ft.StoreTermVectors = true;
             ft.StoreTermVectorPositions = true;
         }
         else if (mod3 == 0)
         {
             ft.StoreTermVectors = true;
             ft.StoreTermVectorOffsets = true;
         }
         else
         {
             ft.StoreTermVectors = true;
         }
         doc.Add(new Field("field", English.IntToEnglish(i), ft));
         //test no term vectors too
         doc.Add(new TextField("noTV", English.IntToEnglish(i), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
 }
Beispiel #3
0
        public virtual void TestOmitNormsCombos()
        {
            // indexed with norms
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            Field norms = new Field("foo", "a", customType);
            // indexed without norms
            FieldType customType1 = new FieldType(TextField.TYPE_STORED);
            customType1.OmitNorms = true;
            Field noNorms = new Field("foo", "a", customType1);
            // not indexed, but stored
            FieldType customType2 = new FieldType();
            customType2.Stored = true;
            Field noIndex = new Field("foo", "a", customType2);
            // not indexed but stored, omitNorms is set
            FieldType customType3 = new FieldType();
            customType3.Stored = true;
            customType3.OmitNorms = true;
            Field noNormsNoIndex = new Field("foo", "a", customType3);
            // not indexed nor stored (doesnt exist at all, we index a different field instead)
            Field emptyNorms = new Field("bar", "a", customType);

            Assert.IsNotNull(GetNorms("foo", norms, norms));
            Assert.IsNull(GetNorms("foo", norms, noNorms));
            Assert.IsNotNull(GetNorms("foo", norms, noIndex));
            Assert.IsNotNull(GetNorms("foo", norms, noNormsNoIndex));
            Assert.IsNotNull(GetNorms("foo", norms, emptyNorms));
            Assert.IsNull(GetNorms("foo", noNorms, noNorms));
            Assert.IsNull(GetNorms("foo", noNorms, noIndex));
            Assert.IsNull(GetNorms("foo", noNorms, noNormsNoIndex));
            Assert.IsNull(GetNorms("foo", noNorms, emptyNorms));
            Assert.IsNull(GetNorms("foo", noIndex, noIndex));
            Assert.IsNull(GetNorms("foo", noIndex, noNormsNoIndex));
            Assert.IsNull(GetNorms("foo", noIndex, emptyNorms));
            Assert.IsNull(GetNorms("foo", noNormsNoIndex, noNormsNoIndex));
            Assert.IsNull(GetNorms("foo", noNormsNoIndex, emptyNorms));
            Assert.IsNull(GetNorms("foo", emptyNorms, emptyNorms));
        }
Beispiel #4
0
        public virtual void TestNoNrmFile()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy;
            lmp.MergeFactor = 2;
            lmp.NoCFSRatio = 0.0;
            Document d = new Document();

            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f1 = NewField("f1", "this field has no norms", customType);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoNrm(ram);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            AssertNoNrm(ram);
            ram.Dispose();
        }
Beispiel #5
0
        public virtual void TestMixedRAM()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2)));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms

            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            for (int i = 0; i < 5; i++)
            {
                writer.AddDocument(d);
            }

            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);

            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(!fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
 private void AddNoProxDoc(IndexWriter writer)
 {
     Document doc = new Document();
     FieldType customType = new FieldType(TextField.TYPE_STORED);
     customType.IndexOptionsValue = IndexOptions.DOCS_ONLY;
     Field f = new Field("content3", "aaa", customType);
     doc.Add(f);
     FieldType customType2 = new FieldType();
     customType2.Stored = true;
     customType2.IndexOptionsValue = IndexOptions.DOCS_ONLY;
     f = new Field("content4", "aaa", customType2);
     doc.Add(f);
     writer.AddDocument(doc);
 }
 private void AddDoc(IndexWriter writer, int id)
 {
     Document doc = new Document();
     doc.Add(new TextField("content", "aaa", Field.Store.NO));
     doc.Add(new StringField("id", Convert.ToString(id), Field.Store.YES));
     FieldType customType2 = new FieldType(TextField.TYPE_STORED);
     customType2.StoreTermVectors = true;
     customType2.StoreTermVectorPositions = true;
     customType2.StoreTermVectorOffsets = true;
     doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
     doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
     doc.Add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
     doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
     // add numeric fields, to test if flex preserves encoding
     doc.Add(new IntField("trieInt", id, Field.Store.NO));
     doc.Add(new LongField("trieLong", (long)id, Field.Store.NO));
     // add docvalues fields
     doc.Add(new NumericDocValuesField("dvByte", (sbyte)id));
     sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
     BytesRef @ref = new BytesRef(bytes);
     doc.Add(new BinaryDocValuesField("dvBytesDerefFixed", @ref));
     doc.Add(new BinaryDocValuesField("dvBytesDerefVar", @ref));
     doc.Add(new SortedDocValuesField("dvBytesSortedFixed", @ref));
     doc.Add(new SortedDocValuesField("dvBytesSortedVar", @ref));
     doc.Add(new BinaryDocValuesField("dvBytesStraightFixed", @ref));
     doc.Add(new BinaryDocValuesField("dvBytesStraightVar", @ref));
     doc.Add(new DoubleDocValuesField("dvDouble", (double)id));
     doc.Add(new FloatDocValuesField("dvFloat", (float)id));
     doc.Add(new NumericDocValuesField("dvInt", id));
     doc.Add(new NumericDocValuesField("dvLong", id));
     doc.Add(new NumericDocValuesField("dvPacked", id));
     doc.Add(new NumericDocValuesField("dvShort", (short)id));
     // a field with both offsets and term vectors for a cross-check
     FieldType customType3 = new FieldType(TextField.TYPE_STORED);
     customType3.StoreTermVectors = true;
     customType3.StoreTermVectorPositions = true;
     customType3.StoreTermVectorOffsets = true;
     customType3.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     doc.Add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
     // a field that omits only positions
     FieldType customType4 = new FieldType(TextField.TYPE_STORED);
     customType4.StoreTermVectors = true;
     customType4.StoreTermVectorPositions = false;
     customType4.StoreTermVectorOffsets = true;
     customType4.IndexOptionsValue = IndexOptions.DOCS_AND_FREQS;
     doc.Add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
     // TODO:
     //   index different norms types via similarity (we use a random one currently?!)
     //   remove any analyzer randomness, explicitly add payloads for certain fields.
     writer.AddDocument(doc);
 }
Beispiel #8
0
 private Document CreateDoc()
 {
     Document doc = new Document();
     FieldType ft = new FieldType(TextField.TYPE_STORED);
     ft.StoreTermVectors = true;
     ft.StoreTermVectorOffsets = true;
     ft.StoreTermVectorPositions = true;
     doc.Add(NewField("c", "aaa", ft));
     return doc;
 }
Beispiel #9
0
        public virtual void TestMixedVectrosVectors()
        {
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetOpenMode(OpenMode.CREATE));
            Document doc = new Document();

            FieldType ft2 = new FieldType(TextField.TYPE_STORED);
            ft2.StoreTermVectors = true;

            FieldType ft3 = new FieldType(TextField.TYPE_STORED);
            ft3.StoreTermVectors = true;
            ft3.StoreTermVectorPositions = true;

            FieldType ft4 = new FieldType(TextField.TYPE_STORED);
            ft4.StoreTermVectors = true;
            ft4.StoreTermVectorOffsets = true;

            FieldType ft5 = new FieldType(TextField.TYPE_STORED);
            ft5.StoreTermVectors = true;
            ft5.StoreTermVectorOffsets = true;
            ft5.StoreTermVectorPositions = true;

            doc.Add(NewTextField("field", "one", Field.Store.YES));
            doc.Add(NewField("field", "one", ft2));
            doc.Add(NewField("field", "one", ft3));
            doc.Add(NewField("field", "one", ft4));
            doc.Add(NewField("field", "one", ft5));
            writer.AddDocument(doc);
            IndexReader reader = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            Query query = new TermQuery(new Term("field", "one"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            Fields vectors = searcher.IndexReader.GetTermVectors(hits[0].Doc);
            Assert.IsNotNull(vectors);
            Assert.AreEqual(1, vectors.Size());
            Terms vector = vectors.Terms("field");
            Assert.IsNotNull(vector);
            Assert.AreEqual(1, vector.Size());
            TermsEnum termsEnum = vector.Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("one", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(5, termsEnum.TotalTermFreq());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.IsNotNull(dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(5, dpEnum.Freq());
            for (int i = 0; i < 5; i++)
            {
                Assert.AreEqual(i, dpEnum.NextPosition());
            }

            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.IsNotNull(dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(5, dpEnum.Freq());
            for (int i = 0; i < 5; i++)
            {
                dpEnum.NextPosition();
                Assert.AreEqual(4 * i, dpEnum.StartOffset());
                Assert.AreEqual(4 * i + 3, dpEnum.EndOffset());
            }
            reader.Dispose();
        }