public virtual void TestDateCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            const long @base = 13; // prime
            long day = 1000L * 60 * 60 * 24;

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 50; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs costed less than if they had only been packed
            Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8);
        }
 public virtual void TestForceMergeNotNeeded()
 {
     Directory dir = NewDirectory();
     AtomicBoolean mayMerge = new AtomicBoolean(true);
     MergeScheduler mergeScheduler = new SerialMergeSchedulerAnonymousInnerClassHelper(this, mayMerge);
     IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(mergeScheduler).SetMergePolicy(MergePolicy()));
     writer.Config.MergePolicy.NoCFSRatio = Random().NextBoolean() ? 0 : 1;
     int numSegments = TestUtil.NextInt(Random(), 2, 20);
     for (int i = 0; i < numSegments; ++i)
     {
         int numDocs = TestUtil.NextInt(Random(), 1, 5);
         for (int j = 0; j < numDocs; ++j)
         {
             writer.AddDocument(new Document());
         }
         writer.Reader.Dispose();
     }
     for (int i = 5; i >= 0; --i)
     {
         int segmentCount = writer.SegmentCount;
         int maxNumSegments = i == 0 ? 1 : TestUtil.NextInt(Random(), 1, 10);
         mayMerge.Set(segmentCount > maxNumSegments);
         writer.ForceMerge(maxNumSegments);
     }
     writer.Dispose();
     dir.Dispose();
 }
        public virtual void TestTermEnum()
        {
            IndexWriter writer = null;

            writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            // ADD 100 documents with term : aaa
            // add 100 documents with terms: aaa bbb
            // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer, "aaa");
                AddDoc(writer, "aaa bbb");
            }

            writer.Dispose();

            // verify document frequency of terms in an multi segment index
            VerifyDocFreq();

            // merge segments
            writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND));
            writer.ForceMerge(1);
            writer.Dispose();

            // verify document frequency of terms in a single segment index
            VerifyDocFreq();
        }
        public virtual void TestAllSegmentsLarge()
        {
            Directory dir = new RAMDirectory();

            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);

            AddDocs(writer, 3);
            AddDocs(writer, 3);
            AddDocs(writer, 3);

            writer.Dispose();

            conf = NewWriterConfig();
            LogMergePolicy lmp = new LogDocMergePolicy();
            lmp.MaxMergeDocs = 2;
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
        public override void SetUp()
        {
            base.SetUp();
            store = NewDirectory();
            IndexWriter writer = new IndexWriter(store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            Document doc;

            doc = new Document();
            doc.Add(NewTextField("aaa", "foo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("aaa", "foo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("contents", "Tom", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("contents", "Jerry", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(NewTextField("zzz", "bar", Field.Store.YES));
            writer.AddDocument(doc);

            writer.ForceMerge(1);
            writer.Dispose();
        }
        public static void Main(string[] args)
        {
            if (args.Length < 3)
            {
                Console.Error.WriteLine("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
                Environment.Exit(1);
            }
            FSDirectory mergedIndex = FSDirectory.Open(new System.IO.DirectoryInfo(args[0]));

#pragma warning disable 612, 618
            using (IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, null)
               .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)))
#pragma warning restore 612, 618
            {

                Directory[] indexes = new Directory[args.Length - 1];
                for (int i = 1; i < args.Length; i++)
                {
                    indexes[i - 1] = FSDirectory.Open(new System.IO.DirectoryInfo(args[i]));
                }

                Console.WriteLine("Merging...");
                writer.AddIndexes(indexes);

                Console.WriteLine("Full merge...");
                writer.ForceMerge(1);
            }
            Console.WriteLine("Done.");
        }
        public virtual void TestLucene()
        {
            int num = 100;

            Directory indexA = NewDirectory();
            Directory indexB = NewDirectory();

            FillIndex(Random(), indexA, 0, num);
            bool fail = VerifyIndex(indexA, 0);
            if (fail)
            {
                Assert.Fail("Index a is invalid");
            }

            FillIndex(Random(), indexB, num, num);
            fail = VerifyIndex(indexB, num);
            if (fail)
            {
                Assert.Fail("Index b is invalid");
            }

            Directory merged = NewDirectory();

            IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2)));
            writer.AddIndexes(indexA, indexB);
            writer.ForceMerge(1);
            writer.Dispose();

            fail = VerifyIndex(merged, 0);

            Assert.IsFalse(fail, "The merged index is invalid");
            indexA.Dispose();
            indexB.Dispose();
            merged.Dispose();
        }
        public virtual void TestFixedBinary()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
               .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document doc = new Document();
            var bytes = new byte[4];
            BytesRef data = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 24);
                bytes[1] = (byte)(i >> 16);
                bytes[2] = (byte)(i >> 8);
                bytes[3] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            int expectedValue = 0;
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                BytesRef scratch = new BytesRef();
                BinaryDocValues dv = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 24);
                    bytes[1] = (byte)(expectedValue >> 16);
                    bytes[2] = (byte)(expectedValue >> 8);
                    bytes[3] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Beispiel #9
0
        public virtual void TestOmitNorms_Mem()
        {
            Directory ram = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document d = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);
            d.Add(f1);

            // this field will NOT have norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);
            d.Add(f2);

            writer.AddDocument(d);
            writer.ForceMerge(1);
            // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
            // keep things constant
            d = new Document();

            // Reverse
            d.Add(NewField("f1", "this field has norms", customType));

            d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO));

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Beispiel #10
0
 public static void BeforeClass()
 {
     Dir = NewFSDirectory(CreateTempDir("2Bdocs"));
     IndexWriter iw = new IndexWriter(Dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
     Document doc = new Document();
     for (int i = 0; i < 262144; i++)
     {
         iw.AddDocument(doc);
     }
     iw.ForceMerge(1);
     iw.Dispose();
 }
        public virtual void Test()
        {
            Directory dir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            IList<long?> numbers = new List<long?>();
            IList<BytesRef> binary = new List<BytesRef>();
            IList<BytesRef> sorted = new List<BytesRef>();
            int numDocs = AtLeast(100);
            for (int i = 0; i < numDocs; i++)
            {
                Document d = new Document();
                long number = Random().NextLong();
                d.Add(new NumericDocValuesField("number", number));
                BytesRef bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()));
                d.Add(new BinaryDocValuesField("bytes", bytes));
                binary.Add(bytes);
                bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()));
                d.Add(new SortedDocValuesField("sorted", bytes));
                sorted.Add(bytes);
                w.AddDocument(d);
                numbers.Add(number);
            }

            w.ForceMerge(1);
            IndexReader r = w.Reader;
            w.Dispose();

            Assert.AreEqual(1, r.Leaves.Count);
            AtomicReader ar = (AtomicReader)r.Leaves[0].Reader;

            int numThreads = TestUtil.NextInt(Random(), 2, 5);
            IList<ThreadClass> threads = new List<ThreadClass>();
            CountDownLatch startingGun = new CountDownLatch(1);
            for (int t = 0; t < numThreads; t++)
            {
                Random threadRandom = new Random(Random().Next());
                ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, numbers, binary, sorted, numDocs, ar, startingGun, threadRandom);
                thread.Start();
                threads.Add(thread);
            }

            startingGun.countDown();

            foreach (ThreadClass thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestUniqueValuesCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256);
            IList<long> values = new List<long>();

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                long value;
                if (values.Count < uniqueValueCount)
                {
                    value = Random().NextLong();
                    values.Add(value);
                }
                else
                {
                    value = RandomInts.RandomFrom(Random(), values);
                }
                dvf.LongValue = value;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 20; ++i)
            {
                dvf.LongValue = RandomInts.RandomFrom(Random(), values);
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs did not cost 8 bytes each
            Assert.IsTrue(size2 < size1 + 8 * 20);
        }
        public virtual void TestEmptyIndex()
        {
            Directory rd1 = NewDirectory();
            IndexWriter iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            iw.Dispose();
            // create a copy:
            Directory rd2 = NewDirectory(rd1);

            Directory rdOut = NewDirectory();

            IndexWriter iwOut = new IndexWriter(rdOut, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            ParallelAtomicReader apr = new ParallelAtomicReader(SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(rd1)), SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(rd2)));

            // When unpatched, Lucene crashes here with a NoSuchElementException (caused by ParallelTermEnum)
            iwOut.AddIndexes(apr);
            iwOut.ForceMerge(1);

            // 2nd try with a readerless parallel reader
            iwOut.AddIndexes(new ParallelAtomicReader());
            iwOut.ForceMerge(1);

            ParallelCompositeReader cpr = new ParallelCompositeReader(DirectoryReader.Open(rd1), DirectoryReader.Open(rd2));

            // When unpatched, Lucene crashes here with a NoSuchElementException (caused by ParallelTermEnum)
            iwOut.AddIndexes(cpr);
            iwOut.ForceMerge(1);

            // 2nd try with a readerless parallel reader
            iwOut.AddIndexes(new ParallelCompositeReader());
            iwOut.ForceMerge(1);

            iwOut.Dispose();
            rdOut.Dispose();
            rd1.Dispose();
            rd2.Dispose();
        }
        public virtual void TestNumerics([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BNumerics"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
               .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(scheduler).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document doc = new Document();
            NumericDocValuesField dvField = new NumericDocValuesField("dv", 0);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                dvField.LongValue = i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            long expectedValue = 0;
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                NumericDocValues dv = reader.GetNumericDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    Assert.AreEqual(expectedValue, dv.Get(i));
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Beispiel #15
0
        public virtual void TestPartialMerge()
        {
            Directory dir = NewDirectory();

            Document doc = new Document();

            doc.Add(NewStringField("content", "aaa", Field.Store.NO));
            int incrMin = TestNightly ? 15 : 40;

            for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt32(Random, incrMin, 5 * incrMin))
            {
                LogDocMergePolicy ldmp = new LogDocMergePolicy();
                ldmp.MinMergeDocs = 1;
                ldmp.MergeFactor  = 5;
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp));
                for (int j = 0; j < numDocs; j++)
                {
                    writer.AddDocument(doc);
                }
                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                int segCount = sis.Count;

                ldmp             = new LogDocMergePolicy();
                ldmp.MergeFactor = 5;
                writer           = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(ldmp));
                writer.ForceMerge(3);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                int optSegCount = sis.Count;

                if (segCount < 3)
                {
                    Assert.AreEqual(segCount, optSegCount);
                }
                else
                {
                    Assert.AreEqual(3, optSegCount);
                }
            }
            dir.Dispose();
        }
Beispiel #16
0
        internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs)
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs);

            Document doc = new Document();

            doc.Add(NewStringField(field, val, Field.Store.NO));
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100)));

            for (int i = 0; i < ndocs; i++)
            {
                writer.AddDocument(doc);
            }

            writer.ForceMerge(1);
            writer.Dispose();
        }
Beispiel #17
0
        public virtual void TestMixedRAM()
        {
            Directory   ram      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2)));
            Document    d        = new Document();

            // this field will have norms
            Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO);

            d.Add(f1);

            // this field will NOT have norms

            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            Field f2 = NewField("f2", "this field has NO norms in all docs", customType);

            d.Add(f2);

            for (int i = 0; i < 5; i++)
            {
                writer.AddDocument(d);
            }

            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);

            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            Assert.IsTrue(!fi.FieldInfo("f1").OmitsNorms, "OmitNorms field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms, "OmitNorms field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
Beispiel #18
0
        public virtual void TestTermVectorCorruption3()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));

            Document  document   = new Document();
            FieldType customType = new FieldType();

            customType.IsStored = true;

            Field storedField = NewField("stored", "stored", customType);

            document.Add(storedField);
            FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);

            customType2.StoreTermVectors         = true;
            customType2.StoreTermVectorPositions = true;
            customType2.StoreTermVectorOffsets   = true;
            Field termVectorField = NewField("termVector", "termVector", customType2);

            document.Add(termVectorField);
            for (int i = 0; i < 10; i++)
            {
                writer.AddDocument(document);
            }
            writer.Dispose();

            writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));
            for (int i = 0; i < 6; i++)
            {
                writer.AddDocument(document);
            }

            writer.ForceMerge(1);
            writer.Dispose();

            IndexReader reader = DirectoryReader.Open(dir);

            for (int i = 0; i < 10; i++)
            {
                reader.GetTermVectors(i);
                reader.Document(i);
            }
            reader.Dispose();
            dir.Dispose();
        }
        internal static void ModifyIndex(int i, Directory dir)
        {
            switch (i)
            {
            case 0:
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: modify index");
                }
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                w.DeleteDocuments(new Term("field2", "a11"));
                w.DeleteDocuments(new Term("field2", "b30"));
                w.Dispose();
                break;
            }

            case 1:
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                w.ForceMerge(1);
                w.Dispose();
                break;
            }

            case 2:
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                w.AddDocument(CreateDocument(101, 4));
                w.ForceMerge(1);
                w.AddDocument(CreateDocument(102, 4));
                w.AddDocument(CreateDocument(103, 4));
                w.Dispose();
                break;
            }

            case 3:
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                w.AddDocument(CreateDocument(101, 4));
                w.Dispose();
                break;
            }
            }
        }
Beispiel #20
0
        public virtual void TestNoPrxFile()
        {
            Directory      ram      = NewDirectory();
            Analyzer       analyzer = new MockAnalyzer(Random);
            IndexWriter    writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy lmp      = (LogMergePolicy)writer.Config.MergePolicy;

            lmp.MergeFactor = 2;
            lmp.NoCFSRatio  = 0.0;
            Document d = new Document();

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
            Field f1 = NewField("f1", "this field has term freqs", ft);

            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoPrx(ram);

            // now add some documents with positions, and check there is no prox after optimization
            d  = new Document();
            f1 = NewTextField("f1", "this field has positions", Field.Store.NO);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            AssertNoPrx(ram);
            ram.Dispose();
        }
Beispiel #21
0
        private void CreateIndex(int numHits)
        {
            int numDocs = 500;

            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader2) =>
            {
                return(new TokenStreamComponents(new MockTokenizer(reader2, MockTokenizer.WHITESPACE, true)));
            });
            Directory directory = new SeekCountingDirectory(this, new RAMDirectory());
            // note: test explicitly disables payloads
            IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(false)));

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string   content;
                if (i % (numDocs / numHits) == 0)
                {
                    // add a document that matches the query "term1 term2"
                    content = this.term1 + " " + this.term2;
                }
                else if (i % 15 == 0)
                {
                    // add a document that only contains term1
                    content = this.term1 + " " + this.term1;
                }
                else
                {
                    // add a document that contains term2 but not term 1
                    content = this.term3 + " " + this.term2;
                }

                doc.Add(NewTextField(this.field, content, Documents.Field.Store.YES));
                writer.AddDocument(doc);
            }

            // make sure the index has only a single segment
            writer.ForceMerge(1);
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(directory));

            this.searcher = NewSearcher(reader);
        }
        public virtual void TestSimpleCase()
        {
            string[] keywords = new string[] { "1", "2" };
            string[] unindexed = new string[] { "Netherlands", "Italy" };
            string[] unstored = new string[] { "Amsterdam has lots of bridges", "Venice has lots of canals" };
            string[] text = new string[] { "Amsterdam", "Venice" };

            Directory dir = NewDirectory();
            IndexWriter modifier = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMaxBufferedDeleteTerms(1));

            FieldType custom1 = new FieldType();
            custom1.Stored = true;
            for (int i = 0; i < keywords.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", keywords[i], Field.Store.YES));
                doc.Add(NewField("country", unindexed[i], custom1));
                doc.Add(NewTextField("contents", unstored[i], Field.Store.NO));
                doc.Add(NewTextField("city", text[i], Field.Store.YES));
                modifier.AddDocument(doc);
            }
            modifier.ForceMerge(1);
            modifier.Commit();

            Term term = new Term("city", "Amsterdam");
            int hitCount = GetHitCount(dir, term);
            Assert.AreEqual(1, hitCount);
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now delete by term=" + term);
            }
            modifier.DeleteDocuments(term);
            modifier.Commit();

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now getHitCount");
            }
            hitCount = GetHitCount(dir, term);
            Assert.AreEqual(0, hitCount);

            modifier.Dispose();
            dir.Dispose();
        }
        public virtual void TestPartialMerge()
        {
            Directory dir = NewDirectory();

            Document doc = new Document();
            doc.Add(NewStringField("content", "aaa", Field.Store.NO));
            int incrMin = TEST_NIGHTLY ? 15 : 40;
            for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt(Random(), incrMin, 5 * incrMin))
            {
                LogDocMergePolicy ldmp = new LogDocMergePolicy();
                ldmp.MinMergeDocs = 1;
                ldmp.MergeFactor = 5;
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp));
                for (int j = 0; j < numDocs; j++)
                {
                    writer.AddDocument(doc);
                }
                writer.Dispose();

                SegmentInfos sis = new SegmentInfos();
                sis.Read(dir);
                int segCount = sis.Size();

                ldmp = new LogDocMergePolicy();
                ldmp.MergeFactor = 5;
                writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(ldmp));
                writer.ForceMerge(3);
                writer.Dispose();

                sis = new SegmentInfos();
                sis.Read(dir);
                int optSegCount = sis.Size();

                if (segCount < 3)
                {
                    Assert.AreEqual(segCount, optSegCount);
                }
                else
                {
                    Assert.AreEqual(3, optSegCount);
                }
            }
            dir.Dispose();
        }
Beispiel #24
0
        public virtual void Test()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);

            IndexWriter w = new IndexWriter(dir, iwc);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.OmitNorms    = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            Field field = new Field("field", new MyTokenStream(), ft);

            doc.Add(field);

            int numDocs = (int.MaxValue / 26) + 1;

            for (int i = 0; i < numDocs; i++)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % 100000 == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            dir.Dispose();
        }
Beispiel #25
0
        public void TestCustomMergeScheduler()
        {
            // we don't really need to execute anything, just to make sure the custom MS
            // compiles. But ensure that it can be used as well, e.g., no other hidden
            // dependencies or something. Therefore, don't use any random API !
            Directory         dir  = new RAMDirectory();
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);

            conf.SetMergeScheduler(new ReportingMergeScheduler());
            IndexWriter writer = new IndexWriter(dir, conf);

            writer.AddDocument(new Document());
            writer.Commit(); // trigger flush
            writer.AddDocument(new Document());
            writer.Commit(); // trigger flush
            writer.ForceMerge(1);
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #26
0
        public virtual void TestMixedRAM()
        {
            Directory   ram      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(2)));
            Document    d        = new Document();

            // this field will have Tf
            Field f1 = NewField("f1", "this field has term freqs", normalType);

            d.Add(f1);

            // this field will NOT have Tf
            Field f2 = NewField("f2", "this field has NO Tf in all docs", omitType);

            d.Add(f2);

            for (int i = 0; i < 5; i++)
            {
                writer.AddDocument(d);
            }

            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(d);
            }

            // force merge
            writer.ForceMerge(1);

            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f1").IndexOptions, "OmitTermFreqAndPositions field bit should not be set.");
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions, "OmitTermFreqAndPositions field bit should be set.");

            reader.Dispose();
            ram.Dispose();
        }
        public virtual void TestBackgroundForceMerge()
        {
            Directory dir = NewDirectory();

            for (int pass = 0; pass < 2; pass++)
            {
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51)));
                Document    doc    = new Document();
                doc.Add(NewStringField("field", "aaa", Field.Store.NO));
                for (int i = 0; i < 100; i++)
                {
                    writer.AddDocument(doc);
                }
                writer.ForceMerge(1, false);

                if (0 == pass)
                {
                    writer.Dispose();
                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.AreEqual(1, reader.Leaves.Count);
                    reader.Dispose();
                }
                else
                {
                    // Get another segment to flush so we can verify it is
                    // NOT included in the merging
                    writer.AddDocument(doc);
                    writer.AddDocument(doc);
                    writer.Dispose();

                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.IsTrue(reader.Leaves.Count > 1);
                    reader.Dispose();

                    SegmentInfos infos = new SegmentInfos();
                    infos.Read(dir);
                    Assert.AreEqual(2, infos.Count);
                }
            }

            dir.Dispose();
        }
        public virtual void TestBackgroundForceMerge()
        {
            Directory dir = NewDirectory();
            for (int pass = 0; pass < 2; pass++)
            {
                IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51)));
                Document doc = new Document();
                doc.Add(NewStringField("field", "aaa", Field.Store.NO));
                for (int i = 0; i < 100; i++)
                {
                    writer.AddDocument(doc);
                }
                writer.ForceMerge(1, false);

                if (0 == pass)
                {
                    writer.Dispose();
                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.AreEqual(1, reader.Leaves.Count);
                    reader.Dispose();
                }
                else
                {
                    // Get another segment to flush so we can verify it is
                    // NOT included in the merging
                    writer.AddDocument(doc);
                    writer.AddDocument(doc);
                    writer.Dispose();

                    DirectoryReader reader = DirectoryReader.Open(dir);
                    Assert.IsTrue(reader.Leaves.Count > 1);
                    reader.Dispose();

                    SegmentInfos infos = new SegmentInfos();
                    infos.Read(dir);
                    Assert.AreEqual(2, infos.Size());
                }
            }

            dir.Dispose();
        }
Beispiel #29
0
        /// <summary>
        /// Perform the upgrade. </summary>
        public void Upgrade()
        {
            if (!DirectoryReader.IndexExists(dir))
            {
                throw new IndexNotFoundException(dir.ToString());
            }

            if (!deletePriorCommits)
            {
                ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);
                if (commits.Count > 1)
                {
                    throw new System.ArgumentException("this tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
                }
            }

            IndexWriterConfig c = (IndexWriterConfig)iwc.Clone();

            c.MergePolicy         = new UpgradeIndexMergePolicy(c.MergePolicy);
            c.IndexDeletionPolicy = new KeepOnlyLastCommitDeletionPolicy();

            IndexWriter w = new IndexWriter(dir, c);

            try
            {
                InfoStream infoStream = c.InfoStream;
                if (infoStream.IsEnabled("IndexUpgrader"))
                {
                    infoStream.Message("IndexUpgrader", "Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
                }
                w.ForceMerge(1);
                if (infoStream.IsEnabled("IndexUpgrader"))
                {
                    infoStream.Message("IndexUpgrader", "All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
                }
            }
            finally
            {
                w.Dispose();
            }
        }
        public virtual void TestLUCENE_1590()
        {
            Document doc = new Document();
            // f1 has no norms
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            FieldType customType2 = new FieldType();

            customType2.IsStored = true;
            doc.Add(NewField("f1", "v1", customType));
            doc.Add(NewField("f1", "v2", customType2));
            // f2 has no TF
            FieldType customType3 = new FieldType(TextField.TYPE_NOT_STORED);

            customType3.IndexOptions = IndexOptions.DOCS_ONLY;
            Field f = NewField("f2", "v1", customType3);

            doc.Add(f);
            doc.Add(NewField("f2", "v2", customType2));

            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            writer.AddDocument(doc);
            writer.ForceMerge(1); // be sure to have a single segment
            writer.Dispose();

            TestUtil.CheckIndex(Dir);

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(Dir));
            FieldInfos    fi     = reader.FieldInfos;

            // f1
            Assert.IsFalse(fi.FieldInfo("f1").HasNorms, "f1 should have no norms");
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f1").IndexOptions, "omitTermFreqAndPositions field bit should not be set for f1");
            // f2
            Assert.IsTrue(fi.FieldInfo("f2").HasNorms, "f2 should have norms");
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions, "omitTermFreqAndPositions field bit should be set for f2");
            reader.Dispose();
        }
Beispiel #31
0
        public virtual void TestKeepNoneOnInitDeletionPolicy()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory dir = NewDirectory();

                IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy(this)).SetMaxBufferedDocs(10);
                MergePolicy       mp   = conf.MergePolicy;
                mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                IndexWriter writer = new IndexWriter(dir, conf);
                KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy;
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                }
                writer.Dispose();

                conf          = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy);
                mp            = conf.MergePolicy;
                mp.NoCFSRatio = 1.0;
                writer        = new IndexWriter(dir, conf);
                policy        = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy;
                writer.ForceMerge(1);
                writer.Dispose();

                Assert.AreEqual(2, policy.NumOnInit);
                // If we are not auto committing then there should
                // be exactly 2 commits (one per close above):
                Assert.AreEqual(2, policy.NumOnCommit);

                // Simplistic check: just verify the index is in fact
                // readable:
                IndexReader reader = DirectoryReader.Open(dir);
                reader.Dispose();

                dir.Dispose();
            }
        }
Beispiel #32
0
        public virtual void Test()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);

            IndexWriter w = new IndexWriter(dir, iwc);

            MergePolicy mp = w.Config.MergePolicy;
            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.OmitNorms = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            Field field = new Field("field", new MyTokenStream(), ft);
            doc.Add(field);

            int numDocs = (int.MaxValue / 26) + 1;
            for (int i = 0; i < numDocs; i++)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % 100000 == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            dir.Dispose();
        }
Beispiel #33
0
        public virtual void TestCommitUserData()
        {
            Directory   dir = NewDirectory();
            IndexWriter w   = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));

            for (int j = 0; j < 17; j++)
            {
                AddDoc(w);
            }
            w.Dispose();

            DirectoryReader r = DirectoryReader.Open(dir);

            // commit(Map) never called for this index
            Assert.AreEqual(0, r.IndexCommit.UserData.Count);
            r.Dispose();

            w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
            for (int j = 0; j < 17; j++)
            {
                AddDoc(w);
            }
            IDictionary <string, string> data = new Dictionary <string, string>();

            data["label"] = "test1";
            w.SetCommitData(data);
            w.Dispose();

            r = DirectoryReader.Open(dir);
            Assert.AreEqual("test1", r.IndexCommit.UserData["label"]);
            r.Dispose();

            w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            w.ForceMerge(1);
            w.Dispose();

            dir.Dispose();
        }
Beispiel #34
0
 public virtual void TestTermOrd()
 {
     Directory d = NewDirectory();
     IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())));
     Documents.Document doc = new Documents.Document();
     doc.Add(NewTextField("f", "a b c", Field.Store.NO));
     w.AddDocument(doc);
     w.ForceMerge(1);
     DirectoryReader r = w.Reader;
     TermsEnum terms = GetOnlySegmentReader(r).Fields.Terms("f").Iterator(null);
     Assert.IsTrue(terms.Next() != null);
     try
     {
         Assert.AreEqual(0, terms.Ord());
     }
     catch (System.NotSupportedException uoe)
     {
         // ok -- codec is not required to support this op
     }
     r.Dispose();
     w.Dispose();
     d.Dispose();
 }
Beispiel #35
0
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf        = NewWriterConfig();
            IndexWriter       writer      = new IndexWriter(dir, conf);
            const int         numSegments = 15;

            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();

            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
        public virtual void TestLucene()
        {
            int num = 100;

            Directory indexA = NewDirectory();
            Directory indexB = NewDirectory();

            FillIndex(Random, indexA, 0, num);
            bool fail = VerifyIndex(indexA, 0);

            if (fail)
            {
                Assert.Fail("Index a is invalid");
            }

            FillIndex(Random, indexB, num, num);
            fail = VerifyIndex(indexB, num);
            if (fail)
            {
                Assert.Fail("Index b is invalid");
            }

            Directory merged = NewDirectory();

            IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy(2)));

            writer.AddIndexes(indexA, indexB);
            writer.ForceMerge(1);
            writer.Dispose();

            fail = VerifyIndex(merged, 0);

            Assert.IsFalse(fail, "The merged index is invalid");
            indexA.Dispose();
            indexB.Dispose();
            merged.Dispose();
        }
Beispiel #37
0
        public virtual void TestNonFlex()
        {
            Directory d = NewDirectory();

            const int DOC_COUNT = 177;

            IndexWriter w = new IndexWriter(d, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(7).SetMergePolicy(NewLogMergePolicy()));

            for (int iter = 0; iter < 2; iter++)
            {
                if (iter == 0)
                {
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewTextField("field1", "this is field1", Field.Store.NO));
                    doc.Add(NewTextField("field2", "this is field2", Field.Store.NO));
                    doc.Add(NewTextField("field3", "aaa", Field.Store.NO));
                    doc.Add(NewTextField("field4", "bbb", Field.Store.NO));
                    for (int i = 0; i < DOC_COUNT; i++)
                    {
                        w.AddDocument(doc);
                    }
                }
                else
                {
                    w.ForceMerge(1);
                }

                IndexReader r = w.GetReader();

                TermsEnum terms = MultiFields.GetTerms(r, "field3").GetEnumerator();
                Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("abc")));
                r.Dispose();
            }

            w.Dispose();
            d.Dispose();
        }
Beispiel #38
0
        public virtual void TestNonFlex()
        {
            Directory d = NewDirectory();

            const int DOC_COUNT = 177;

            IndexWriter w = new IndexWriter(d, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7).SetMergePolicy(NewLogMergePolicy()));

            for (int iter = 0; iter < 2; iter++)
            {
                if (iter == 0)
                {
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewTextField("field1", "this is field1", Field.Store.NO));
                    doc.Add(NewTextField("field2", "this is field2", Field.Store.NO));
                    doc.Add(NewTextField("field3", "aaa", Field.Store.NO));
                    doc.Add(NewTextField("field4", "bbb", Field.Store.NO));
                    for (int i = 0; i < DOC_COUNT; i++)
                    {
                        w.AddDocument(doc);
                    }
                }
                else
                {
                    w.ForceMerge(1);
                }

                IndexReader r = w.Reader;

                TermsEnum terms = MultiFields.GetTerms(r, "field3").Iterator(null);
                Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("abc")));
                r.Dispose();
            }

            w.Dispose();
            d.Dispose();
        }
        public virtual void TestLengthPrefixAcrossTwoPages()
        {
            Directory d = NewDirectory();
            IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document doc = new Document();
            var bytes = new byte[32764];
            BytesRef b = new BytesRef();
            b.Bytes = bytes;
            b.Length = bytes.Length;
            doc.Add(new SortedDocValuesField("field", b));
            w.AddDocument(doc);
            bytes[0] = 1;
            w.AddDocument(doc);
            w.ForceMerge(1);
            DirectoryReader r = w.Reader;
            BinaryDocValues s = FieldCache.DEFAULT.GetTerms(GetOnlySegmentReader(r), "field", false);

            BytesRef bytes1 = new BytesRef();
            s.Get(0, bytes1);
            Assert.AreEqual(bytes.Length, bytes1.Length);
            bytes[0] = 0;
            Assert.AreEqual(b, bytes1);

            s.Get(1, bytes1);
            Assert.AreEqual(bytes.Length, bytes1.Length);
            bytes[0] = 1;
            Assert.AreEqual(b, bytes1);
            r.Dispose();
            w.Dispose();
            d.Dispose();
        }
Beispiel #40
0
        public virtual void TestPositions()
        {
            Directory   ram      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document    d        = new Document();

            // f1,f2,f3: docs only
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_ONLY;

            Field f1 = NewField("f1", "this field has docs only", ft);

            d.Add(f1);

            Field f2 = NewField("f2", "this field has docs only", ft);

            d.Add(f2);

            Field f3 = NewField("f3", "this field has docs only", ft);

            d.Add(f3);

            FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED);

            ft2.IndexOptions = IndexOptions.DOCS_AND_FREQS;

            // f4,f5,f6 docs and freqs
            Field f4 = NewField("f4", "this field has docs and freqs", ft2);

            d.Add(f4);

            Field f5 = NewField("f5", "this field has docs and freqs", ft2);

            d.Add(f5);

            Field f6 = NewField("f6", "this field has docs and freqs", ft2);

            d.Add(f6);

            FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED);

            ft3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;

            // f7,f8,f9 docs/freqs/positions
            Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3);

            d.Add(f7);

            Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3);

            d.Add(f8);

            Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3);

            d.Add(f9);

            writer.AddDocument(d);
            writer.ForceMerge(1);

            // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
            // and docs/freqs/positions for f3, f6, f9
            d = new Document();

            // f1,f4,f7: docs only
            f1 = NewField("f1", "this field has docs only", ft);
            d.Add(f1);

            f4 = NewField("f4", "this field has docs only", ft);
            d.Add(f4);

            f7 = NewField("f7", "this field has docs only", ft);
            d.Add(f7);

            // f2, f5, f8: docs and freqs
            f2 = NewField("f2", "this field has docs and freqs", ft2);
            d.Add(f2);

            f5 = NewField("f5", "this field has docs and freqs", ft2);
            d.Add(f5);

            f8 = NewField("f8", "this field has docs and freqs", ft2);
            d.Add(f8);

            // f3, f6, f9: docs and freqs and positions
            f3 = NewField("f3", "this field has docs and freqs and positions", ft3);
            d.Add(f3);

            f6 = NewField("f6", "this field has docs and freqs and positions", ft3);
            d.Add(f6);

            f9 = NewField("f9", "this field has docs and freqs and positions", ft3);
            d.Add(f9);

            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            // docs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").IndexOptions);
            // docs + docs/freqs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").IndexOptions);
            // docs + docs/freqs/pos = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").IndexOptions);
            // docs/freqs + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").IndexOptions);
            // docs/freqs + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").IndexOptions);
            // docs/freqs + docs/freqs/pos = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").IndexOptions);
            // docs/freqs/pos + docs = docs
            Assert.AreEqual(IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").IndexOptions);
            // docs/freqs/pos + docs/freqs = docs/freqs
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").IndexOptions);
            // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").IndexOptions);

            reader.Dispose();
            ram.Dispose();
        }
Beispiel #41
0
        public virtual void TestKeepLastNDeletionPolicy()
        {
            const int N = 5;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory dir = NewDirectory();

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
                for (int j = 0; j < N + 1; j++)
                {
                    IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10);
                    MergePolicy       mp   = conf.MergePolicy;
                    mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0;
                    IndexWriter writer = new IndexWriter(dir, conf);
                    policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy;
                    for (int i = 0; i < 17; i++)
                    {
                        AddDoc(writer);
                    }
                    writer.ForceMerge(1);
                    writer.Dispose();
                }

                Assert.IsTrue(policy.NumDelete > 0);
                Assert.AreEqual(N + 1, policy.NumOnInit);
                Assert.AreEqual(N + 1, policy.NumOnCommit);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetLastCommitGeneration(dir);
                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = DirectoryReader.Open(dir);
                        reader.Dispose();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits prior to last " + N);
                        }
                    }
                    catch (IOException /*e*/)
                    {
                        if (i != N)
                        {
                            throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Dispose();
            }
        }
        internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs)
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs);

            Document doc = new Document();

            doc.Add(NewStringField(field, val, Field.Store.NO));
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100)));

            for (int i = 0; i < ndocs; i++)
            {
                writer.AddDocument(doc);
            }

            writer.ForceMerge(1);
            writer.Dispose();
        }
Beispiel #43
0
        public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            if ("Lucene3x".Equals(Codec.Default.Name))
            {
                throw new Exception("this test cannot run with PreFlex codec");
            }
            Console.WriteLine("Starting Test2B");
            long TERM_COUNT = ((long)int.MaxValue) + 100000000;

            int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000);

            IList <BytesRef> savedTerms = null;

            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));

            //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            dir.CheckIndexOnClose = false; // don't double-checkindex

            if (true)
            {
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                                .SetRAMBufferSizeMB(256.0)
                                                .SetMergeScheduler(scheduler)
                                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                                .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

                MergePolicy mp = w.Config.MergePolicy;
                if (mp is LogByteSizeMergePolicy)
                {
                    // 1 petabyte:
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                }

                Documents.Document doc = new Documents.Document();
                MyTokenStream      ts  = new MyTokenStream(Random(), TERMS_PER_DOC);

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                customType.OmitNorms    = true;
                Field field = new Field("field", ts, customType);
                doc.Add(field);
                //w.setInfoStream(System.out);
                int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                Console.WriteLine("numDocs=" + numDocs);

                for (int i = 0; i < numDocs; i++)
                {
                    long t0 = Environment.TickCount;
                    w.AddDocument(doc);
                    Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
                }
                savedTerms = ts.SavedTerms;

                Console.WriteLine("TEST: full merge");
                w.ForceMerge(1);
                Console.WriteLine("TEST: close writer");
                w.Dispose();
            }

            Console.WriteLine("TEST: open reader");
            IndexReader r = DirectoryReader.Open(dir);

            if (savedTerms == null)
            {
                savedTerms = FindTerms(r);
            }
            int numSavedTerms            = savedTerms.Count;
            IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));

            Console.WriteLine("TEST: test big ord terms...");
            TestSavedTerms(r, bigOrdTerms);
            Console.WriteLine("TEST: test all saved terms...");
            TestSavedTerms(r, savedTerms);
            r.Dispose();

            Console.WriteLine("TEST: now CheckIndex...");
            CheckIndex.Status status = TestUtil.CheckIndex(dir);
            long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;

            Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

            dir.Dispose();
            Console.WriteLine("TEST: done!");
        }
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;
            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field field = new Field("field", tokenStream, ft);
            doc.Add(field);

            const int numDocs = 1000;
            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);
            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));
            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);
            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));
            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
        public virtual void TestForceMergeTempSpaceUsage()
        {
            MockDirectoryWrapper dir = NewMockDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy()));
            if (VERBOSE)
            {
                Console.WriteLine("TEST: config1=" + writer.Config);
            }

            for (int j = 0; j < 500; j++)
            {
                TestIndexWriter.AddDocWithIndex(writer, j);
            }
            int termIndexInterval = writer.Config.TermIndexInterval;
            // force one extra segment w/ different doc store so
            // we see the doc stores get merged
            writer.Commit();
            TestIndexWriter.AddDocWithIndex(writer, 500);
            writer.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: start disk usage");
            }
            long startDiskUsage = 0;
            string[] files = dir.ListAll();
            for (int i = 0; i < files.Length; i++)
            {
                startDiskUsage += dir.FileLength(files[i]);
                if (VERBOSE)
                {
                    Console.WriteLine(files[i] + ": " + dir.FileLength(files[i]));
                }
            }

            dir.ResetMaxUsedSizeInBytes();
            dir.TrackDiskUsage = true;

            // Import to use same term index interval else a
            // smaller one here could increase the disk usage and
            // cause a false failure:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetTermIndexInterval(termIndexInterval).SetMergePolicy(NewLogMergePolicy()));
            writer.ForceMerge(1);
            writer.Dispose();
            long maxDiskUsage = dir.MaxUsedSizeInBytes;
            Assert.IsTrue(maxDiskUsage <= 4 * startDiskUsage, "forceMerge used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (4 * startDiskUsage) + " (= 4X starting usage)");
            dir.Dispose();
        }
        public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                            .SetRAMBufferSizeMB(256.0)
                                            .SetMergeScheduler(scheduler)
                                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document             doc     = new Document();
            var                  bytes   = new byte[2];
            BytesRef             data    = new BytesRef(bytes);
            SortedDocValuesField dvField = new SortedDocValuesField("dv", data);

            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 8);
                bytes[1] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r             = DirectoryReader.Open(dir);
            int             expectedValue = 0;

            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader    reader  = context.AtomicReader;
                BytesRef        scratch = new BytesRef();
                BinaryDocValues dv      = reader.GetSortedDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 8);
                    bytes[1] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestSingleBigValueCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 20000; ++i)
            {
                dvf.LongValue = i & 1023;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            dvf.LongValue = long.MaxValue;
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new value did not grow the bpv for every other value
            Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8);
        }
        public virtual void ChangeIndexNoAdds(Random random, Directory dir)
        {
            // make sure searching sees right # hits
            DirectoryReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            Document d = searcher.Doc(hits[0].Doc);
            assertEquals("wrong first document", "0", d.Get("id"));
            reader.Dispose();

            // fully merge
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND));
            writer.ForceMerge(1);
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            searcher = NewSearcher(reader);
            hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            DoTestHits(hits, 34, searcher.IndexReader);
            reader.Dispose();
        }
        public virtual void TestFullyMergeOldIndex()
        {
            foreach (string name in OldNames)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: index=" + name);
                }
                Directory dir = NewDirectory(OldIndexDirs[name]);
                IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                w.ForceMerge(1);
                w.Dispose();

                dir.Dispose();
            }
        }
Beispiel #50
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(scheduler)
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field         field       = new Field("field", tokenStream, ft);

            doc.Add(field);

            const int numDocs = 1000;

            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);

            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader          mr   = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));

            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);

            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));

            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
Beispiel #51
0
        public virtual void TestCommitOnCloseDiskUsage()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat      = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");

            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal));
            MockDirectoryWrapper dir = NewMockDirectory();
            Analyzer             analyzer;

            if (Random().NextBoolean())
            {
                // no payloads
                analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            }
            else
            {
                // fixed length payloads
                int length = Random().Next(200);
                analyzer = new AnalyzerAnonymousInnerClassHelper2(this, length);
            }

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(10).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));

            for (int j = 0; j < 30; j++)
            {
                AddDocWithIndex(writer, j);
            }
            writer.Dispose();
            dir.ResetMaxUsedSizeInBytes();

            dir.TrackDiskUsage = true;
            long startDiskUsage = dir.MaxUsedSizeInBytes;

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(false).SetMergePolicy(NewLogMergePolicy(10)));
            for (int j = 0; j < 1470; j++)
            {
                AddDocWithIndex(writer, j);
            }
            long midDiskUsage = dir.MaxUsedSizeInBytes;

            dir.ResetMaxUsedSizeInBytes();
            writer.ForceMerge(1);
            writer.Dispose();

            DirectoryReader.Open(dir).Dispose();

            long endDiskUsage = dir.MaxUsedSizeInBytes;

            // Ending index is 50X as large as starting index; due
            // to 3X disk usage normally we allow 150X max
            // transient usage.  If something is wrong w/ deleter
            // and it doesn't delete intermediate segments then it
            // will exceed this 150X:
            // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
            Assert.IsTrue(midDiskUsage < 150 * startDiskUsage, "writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage * 150));
            Assert.IsTrue(endDiskUsage < 150 * startDiskUsage, "writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage * 150));
            dir.Dispose();
        }
Beispiel #52
0
        public virtual void TestCommitOnCloseForceMerge()
        {
            Directory dir = NewDirectory();

            // Must disable throwing exc on double-write: this
            // test uses IW.rollback which easily results in
            // writing to same file more than once
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).PreventDoubleWrite = false;
            }
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(10)));

            for (int j = 0; j < 17; j++)
            {
                AddDocWithIndex(writer, j);
            }
            writer.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND));
            writer.ForceMerge(1);

            // Open a reader before closing (commiting) the writer:
            DirectoryReader reader = DirectoryReader.Open(dir);

            // Reader should see index as multi-seg at this
            // point:
            Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment");
            reader.Dispose();

            // Abort the writer:
            writer.Rollback();
            TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge");

            // Open a reader after aborting writer:
            reader = DirectoryReader.Open(dir);

            // Reader should still see index as multi-segment
            Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment");
            reader.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: do real full merge");
            }
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND));
            writer.ForceMerge(1);
            writer.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: writer closed");
            }
            TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge");

            // Open a reader after aborting writer:
            reader = DirectoryReader.Open(dir);

            // Reader should see index as one segment
            Assert.AreEqual(1, reader.Leaves.Count, "Reader incorrectly sees more than one segment");
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestLiveChangeToCFS()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy(true));
            // Start false:
            iwc.SetUseCompoundFile(false);
            iwc.MergePolicy.NoCFSRatio = 0.0d;
            IndexWriter w = new IndexWriter(dir, iwc);
            // Change to true:
            w.Config.SetUseCompoundFile(true);

            Document doc = new Document();
            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit");

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            w.ForceMerge(1);
            w.Commit();

            // no compound files after merge
            Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge");

            MergePolicy lmp = w.Config.MergePolicy;
            lmp.NoCFSRatio = 1.0;
            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            w.AddDocument(doc);
            w.ForceMerge(1);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge");
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestAddIndexOnDiskFull()
        {
            // MemoryCodec, since it uses FST, is not necessarily
            // "additive", ie if you add up N small FSTs, then merge
            // them, the merged result can easily be larger than the
            // sum because the merged FST may use array encoding for
            // some arcs (which uses more space):

            string idFormat      = TestUtil.GetPostingsFormat("id");
            string contentFormat = TestUtil.GetPostingsFormat("content");

            AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory") || contentFormat.Equals("Memory"));

            int START_COUNT = 57;
            int NUM_DIR     = TEST_NIGHTLY ? 50 : 5;
            int END_COUNT   = START_COUNT + NUM_DIR * (TEST_NIGHTLY ? 25 : 5);

            // Build up a bunch of dirs that have indexes which we
            // will then merge together by calling addIndexes(*):
            Directory[] dirs           = new Directory[NUM_DIR];
            long        inputDiskUsage = 0;

            for (int i = 0; i < NUM_DIR; i++)
            {
                dirs[i] = NewDirectory();
                IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
                for (int j = 0; j < 25; j++)
                {
                    AddDocWithIndex(writer, 25 * i + j);
                }
                writer.Dispose();
                string[] files = dirs[i].ListAll();
                for (int j = 0; j < files.Length; j++)
                {
                    inputDiskUsage += dirs[i].FileLength(files[j]);
                }
            }

            // Now, build a starting index that has START_COUNT docs.  We
            // will then try to addIndexes into a copy of this:
            MockDirectoryWrapper startDir  = NewMockDirectory();
            IndexWriter          indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            for (int j = 0; j < START_COUNT; j++)
            {
                AddDocWithIndex(indWriter, j);
            }
            indWriter.Dispose();

            // Make sure starting index seems to be working properly:
            Term        searchTerm = new Term("content", "aaa");
            IndexReader reader     = DirectoryReader.Open(startDir);

            Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");

            IndexSearcher searcher = NewSearcher(reader);

            ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
            Assert.AreEqual(57, hits.Length, "first number of hits");
            reader.Dispose();

            // Iterate with larger and larger amounts of free
            // disk space.  With little free disk space,
            // addIndexes will certainly run out of space &
            // fail.  Verify that when this happens, index is
            // not corrupt and index in fact has added no
            // documents.  Then, we increase disk space by 2000
            // bytes each iteration.  At some point there is
            // enough free disk space and addIndexes should
            // succeed and index should show all documents were
            // added.

            // String[] files = startDir.ListAll();
            long diskUsage = startDir.GetSizeInBytes();

            long startDiskUsage = 0;

            string[] files_ = startDir.ListAll();
            for (int i = 0; i < files_.Length; i++)
            {
                startDiskUsage += startDir.FileLength(files_[i]);
            }

            for (int iter = 0; iter < 3; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                // Start with 100 bytes more than we are currently using:
                long diskFree = diskUsage + TestUtil.NextInt(Random(), 50, 200);

                int method = iter;

                bool success = false;
                bool done    = false;

                string methodName;
                if (0 == method)
                {
                    methodName = "addIndexes(Directory[]) + forceMerge(1)";
                }
                else if (1 == method)
                {
                    methodName = "addIndexes(IndexReader[])";
                }
                else
                {
                    methodName = "addIndexes(Directory[])";
                }

                while (!done)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle...");
                    }

                    // Make a new dir that will enforce disk usage:
                    MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new RAMDirectory(startDir, NewIOContext(Random())));
                    indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false)));
                    IOException err = null;

                    IMergeScheduler ms = indWriter.Config.MergeScheduler;
                    for (int x = 0; x < 2; x++)
                    {
                        if (ms is IConcurrentMergeScheduler)
                        // this test intentionally produces exceptions
                        // in the threads that CMS launches; we don't
                        // want to pollute test output with these.
                        {
                            if (0 == x)
                            {
                                ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                            }
                            else
                            {
                                ((IConcurrentMergeScheduler)ms).ClearSuppressExceptions();
                            }
                        }

                        // Two loops: first time, limit disk space &
                        // throw random IOExceptions; second time, no
                        // disk space limit:

                        double rate      = 0.05;
                        double diskRatio = ((double)diskFree) / diskUsage;
                        long   thisDiskFree;

                        string testName = null;

                        if (0 == x)
                        {
                            dir.RandomIOExceptionRateOnOpen = Random().NextDouble() * 0.01;
                            thisDiskFree = diskFree;
                            if (diskRatio >= 2.0)
                            {
                                rate /= 2;
                            }
                            if (diskRatio >= 4.0)
                            {
                                rate /= 2;
                            }
                            if (diskRatio >= 6.0)
                            {
                                rate = 0.0;
                            }
                            if (VERBOSE)
                            {
                                testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes";
                            }
                        }
                        else
                        {
                            dir.RandomIOExceptionRateOnOpen = 0.0;
                            thisDiskFree = 0;
                            rate         = 0.0;
                            if (VERBOSE)
                            {
                                testName = "disk full test " + methodName + " with unlimited disk space";
                            }
                        }

                        if (VERBOSE)
                        {
                            Console.WriteLine("\ncycle: " + testName);
                        }

                        dir.TrackDiskUsage        = true;
                        dir.MaxSizeInBytes        = thisDiskFree;
                        dir.RandomIOExceptionRate = rate;

                        try
                        {
                            if (0 == method)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: now addIndexes count=" + dirs.Length);
                                }
                                indWriter.AddIndexes(dirs);
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: now forceMerge");
                                }
                                indWriter.ForceMerge(1);
                            }
                            else if (1 == method)
                            {
                                IndexReader[] readers = new IndexReader[dirs.Length];
                                for (int i = 0; i < dirs.Length; i++)
                                {
                                    readers[i] = DirectoryReader.Open(dirs[i]);
                                }
                                try
                                {
                                    indWriter.AddIndexes(readers);
                                }
                                finally
                                {
                                    for (int i = 0; i < dirs.Length; i++)
                                    {
                                        readers[i].Dispose();
                                    }
                                }
                            }
                            else
                            {
                                indWriter.AddIndexes(dirs);
                            }

                            success = true;
                            if (VERBOSE)
                            {
                                Console.WriteLine("  success!");
                            }

                            if (0 == x)
                            {
                                done = true;
                            }
                        }
                        catch (IOException e)
                        {
                            success = false;
                            err     = e;
                            if (VERBOSE)
                            {
                                Console.WriteLine("  hit IOException: " + e);
                                Console.WriteLine(e.StackTrace);
                            }

                            if (1 == x)
                            {
                                Console.WriteLine(e.StackTrace);
                                Assert.Fail(methodName + " hit IOException after disk space was freed up");
                            }
                        }

                        // Make sure all threads from
                        // ConcurrentMergeScheduler are done
                        TestUtil.SyncConcurrentMerges(indWriter);

                        if (VERBOSE)
                        {
                            Console.WriteLine("  now test readers");
                        }

                        // Finally, verify index is not corrupt, and, if
                        // we succeeded, we see all docs added, and if we
                        // failed, we see either all docs or no docs added
                        // (transactional semantics):
                        dir.RandomIOExceptionRateOnOpen = 0.0;
                        try
                        {
                            reader = DirectoryReader.Open(dir);
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);
                            Assert.Fail(testName + ": exception when creating IndexReader: " + e);
                        }
                        int result = reader.DocFreq(searchTerm);
                        if (success)
                        {
                            if (result != START_COUNT)
                            {
                                Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT);
                            }
                        }
                        else
                        {
                            // On hitting exception we still may have added
                            // all docs:
                            if (result != START_COUNT && result != END_COUNT)
                            {
                                Console.WriteLine(err.StackTrace);
                                Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
                            }
                        }

                        searcher = NewSearcher(reader);
                        try
                        {
                            hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs;
                        }
                        catch (IOException e)
                        {
                            Console.WriteLine(e.StackTrace);
                            Assert.Fail(testName + ": exception when searching: " + e);
                        }
                        int result2 = hits.Length;
                        if (success)
                        {
                            if (result2 != result)
                            {
                                Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
                            }
                        }
                        else
                        {
                            // On hitting exception we still may have added
                            // all docs:
                            if (result2 != result)
                            {
                                Console.WriteLine(err.StackTrace);
                                Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
                            }
                        }

                        reader.Dispose();
                        if (VERBOSE)
                        {
                            Console.WriteLine("  count is " + result);
                        }

                        if (done || result == END_COUNT)
                        {
                            break;
                        }
                    }

                    if (VERBOSE)
                    {
                        Console.WriteLine("  start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes);
                    }

                    if (done)
                    {
                        // Javadocs state that temp free Directory space
                        // required is at most 2X total input size of
                        // indices so let's make sure:
                        Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes");
                    }

                    // Make sure we don't hit disk full during close below:
                    dir.MaxSizeInBytes              = 0;
                    dir.RandomIOExceptionRate       = 0.0;
                    dir.RandomIOExceptionRateOnOpen = 0.0;

                    indWriter.Dispose();

                    // Wait for all BG threads to finish else
                    // dir.Dispose() will throw IOException because
                    // there are still open files
                    TestUtil.SyncConcurrentMerges(ms);

                    dir.Dispose();

                    // Try again with more free space:
                    diskFree += TEST_NIGHTLY ? TestUtil.NextInt(Random(), 4000, 8000) : TestUtil.NextInt(Random(), 40000, 80000);
                }
            }

            startDir.Dispose();
            foreach (Directory dir in dirs)
            {
                dir.Dispose();
            }
        }
        public virtual void ChangeIndexWithAdds(Random random, Directory dir, string origOldName)
        {
            // open writer
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy()));
            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;
            if (Compare(origOldName, "24") < 0)
            {
                expected = 44;
            }
            else
            {
                expected = 45;
            }
            Assert.AreEqual(expected, writer.NumDocs(), "wrong doc count");
            writer.Dispose();

            // make sure searching sees right # hits
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document d = searcher.IndexReader.Document(hits[0].Doc);
            assertEquals("wrong first document", "0", d.Get("id"));
            DoTestHits(hits, 44, searcher.IndexReader);
            reader.Dispose();

            // fully merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.APPEND).SetMergePolicy(NewLogMergePolicy()));
            writer.ForceMerge(1);
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            searcher = NewSearcher(reader);
            hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(44, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            DoTestHits(hits, 44, searcher.IndexReader);
            assertEquals("wrong first document", "0", d.Get("id"));
            reader.Dispose();
        }
        public virtual void TestThreadSafety()
        {
            Directory dir = NewDirectory();
            // NOTE: this also controls the number of threads!
            int         n      = TestUtil.NextInt(Random(), 20, 40);
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            for (int i = 0; i < n; i++)
            {
                writer.AddDocument(CreateDocument(i, 3));
            }
            writer.ForceMerge(1);
            writer.Dispose();

            TestReopen test = new TestReopenAnonymousInnerClassHelper3(this, dir, n);

            IList <ReaderCouple> readers     = new SynchronizedCollection <ReaderCouple>();
            DirectoryReader      firstReader = DirectoryReader.Open(dir);
            DirectoryReader      reader      = firstReader;

            ReaderThread[]         threads        = new ReaderThread[n];
            ISet <DirectoryReader> readersToClose = new ConcurrentHashSet <DirectoryReader>(new HashSet <DirectoryReader>());

            for (int i = 0; i < n; i++)
            {
                if (i % 2 == 0)
                {
                    DirectoryReader refreshed = DirectoryReader.OpenIfChanged(reader);
                    if (refreshed != null)
                    {
                        readersToClose.Add(reader);
                        reader = refreshed;
                    }
                }
                DirectoryReader r = reader;

                int index = i;

                ReaderThreadTask task;

                if (i < 4 || (i >= 10 && i < 14) || i > 18)
                {
                    task = new ReaderThreadTaskAnonymousInnerClassHelper(this, test, readers, readersToClose, r, index);
                }
                else
                {
                    task = new ReaderThreadTaskAnonymousInnerClassHelper2(this, readers);
                }

                threads[i] = new ReaderThread(task);
                threads[i].Start();
            }

            lock (this)
            {
                Monitor.Wait(this, TimeSpan.FromMilliseconds(1000));
            }

            for (int i = 0; i < n; i++)
            {
                if (threads[i] != null)
                {
                    threads[i].StopThread();
                }
            }

            for (int i = 0; i < n; i++)
            {
                if (threads[i] != null)
                {
                    threads[i].Join();
                    if (threads[i].Error != null)
                    {
                        string msg = "Error occurred in thread " + threads[i].Name + ":\n" + threads[i].Error.Message;
                        Assert.Fail(msg);
                    }
                }
            }

            foreach (DirectoryReader readerToClose in readersToClose)
            {
                readerToClose.Dispose();
            }

            firstReader.Dispose();
            reader.Dispose();

            foreach (DirectoryReader readerToClose in readersToClose)
            {
                AssertReaderClosed(readerToClose, true);
            }

            AssertReaderClosed(reader, true);
            AssertReaderClosed(firstReader, true);

            dir.Dispose();
        }
        public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged)
        {
            // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
            DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName));
            TestUtil.Rm(indexDir);
            Directory dir = NewFSDirectory(indexDir);
            LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
            mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
            mp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            // TODO: remove randomness
            IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp);
            IndexWriter writer = new IndexWriter(dir, conf);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.MaxDoc, "wrong doc count");
            if (fullyMerged)
            {
                writer.ForceMerge(1);
            }
            writer.Dispose();

            if (!fullyMerged)
            {
                // open fresh writer so we get no prx file in the added segment
                mp = new LogByteSizeMergePolicy();
                mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
                // TODO: remove randomness
                conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp);
                writer = new IndexWriter(dir, conf);
                AddNoProxDoc(writer);
                writer.Dispose();

                conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES);
                writer = new IndexWriter(dir, conf);
                Term searchTerm = new Term("id", "7");
                writer.DeleteDocuments(searchTerm);
                writer.Dispose();
            }

            dir.Dispose();

            return indexDir;
        }
Beispiel #58
0
        public virtual void TestOpenPriorSnapshot()
        {
            Directory dir = NewDirectory();

            IndexWriter           writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10)));
            KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy;

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
                if ((1 + i) % 2 == 0)
                {
                    writer.Commit();
                }
            }
            writer.Dispose();

            ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);

            Assert.AreEqual(5, commits.Count);
            IndexCommit lastCommit = null;

            foreach (IndexCommit commit in commits)
            {
                if (lastCommit == null || commit.Generation > lastCommit.Generation)
                {
                    lastCommit = commit;
                }
            }
            Assert.IsTrue(lastCommit != null);

            // Now add 1 doc and merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy));
            AddDoc(writer);
            Assert.AreEqual(11, writer.NumDocs);
            writer.ForceMerge(1);
            writer.Dispose();

            Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count);

            // Now open writer on the commit just before merge:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs);

            // Should undo our rollback:
            writer.Rollback();

            DirectoryReader r = DirectoryReader.Open(dir);

            // Still merged, still 11 docs
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(11, r.NumDocs);
            r.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs);
            // Commits the rollback:
            writer.Dispose();

            // Now 8 because we made another commit
            Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count);

            r = DirectoryReader.Open(dir);
            // Not fully merged because we rolled it back, and now only
            // 10 docs
            Assert.IsTrue(r.Leaves.Count > 1);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            // Re-merge
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy));
            writer.ForceMerge(1);
            writer.Dispose();

            r = DirectoryReader.Open(dir);
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            // Now open writer on the commit just before merging,
            // but this time keeping only the last commit:
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexCommit(lastCommit));
            Assert.AreEqual(10, writer.NumDocs);

            // Reader still sees fully merged index, because writer
            // opened on the prior commit has not yet committed:
            r = DirectoryReader.Open(dir);
            Assert.AreEqual(1, r.Leaves.Count);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            writer.Dispose();

            // Now reader sees not-fully-merged index:
            r = DirectoryReader.Open(dir);
            Assert.IsTrue(r.Leaves.Count > 1);
            Assert.AreEqual(10, r.NumDocs);
            r.Dispose();

            dir.Dispose();
        }
        public virtual void TestEmptyIndexWithVectors()
        {
            Directory rd1 = NewDirectory();
            {
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: make 1st writer");
                }
                IndexWriter iw      = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                Document    doc     = new Document();
                Field       idField = NewTextField("id", "", Field.Store.NO);
                doc.Add(idField);
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors = true;
                doc.Add(NewField("test", "", customType));
                idField.SetStringValue("1");
                iw.AddDocument(doc);
                doc.Add(NewTextField("test", "", Field.Store.NO));
                idField.SetStringValue("2");
                iw.AddDocument(doc);
                iw.Dispose();

                IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES);
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: make 2nd writer");
                }
                IndexWriter writer = new IndexWriter(rd1, dontMergeConfig);

                writer.DeleteDocuments(new Term("id", "1"));
                writer.Dispose();
                IndexReader ir = DirectoryReader.Open(rd1);
                Assert.AreEqual(2, ir.MaxDoc);
                Assert.AreEqual(1, ir.NumDocs);
                ir.Dispose();

                iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND));
                iw.ForceMerge(1);
                iw.Dispose();
            }

            Directory rd2 = NewDirectory();
            {
                IndexWriter iw  = new IndexWriter(rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                Document    doc = new Document();
                iw.AddDocument(doc);
                iw.Dispose();
            }

            Directory rdOut = NewDirectory();

            IndexWriter          iwOut = new IndexWriter(rdOut, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            DirectoryReader      reader1, reader2;
            ParallelAtomicReader pr = new ParallelAtomicReader(SlowCompositeReaderWrapper.Wrap(reader1 = DirectoryReader.Open(rd1)), SlowCompositeReaderWrapper.Wrap(reader2 = DirectoryReader.Open(rd2)));

            // When unpatched, Lucene crashes here with an ArrayIndexOutOfBoundsException (caused by TermVectorsWriter)
            iwOut.AddIndexes(pr);

            // ParallelReader closes any IndexReader you added to it:
            pr.Dispose();

            // assert subreaders were closed
            Assert.AreEqual(0, reader1.RefCount);
            Assert.AreEqual(0, reader2.RefCount);

            rd1.Dispose();
            rd2.Dispose();

            iwOut.ForceMerge(1);
            iwOut.Dispose();

            rdOut.Dispose();
        }
Beispiel #60
0
        public virtual void TestBasic()
        {
            Directory   dir      = NewDirectory();
            Analyzer    analyzer = new MockAnalyzer(Random);
            IndexWriter writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2)));

            StringBuilder sb   = new StringBuilder(265);
            string        term = "term";

            for (int i = 0; i < 30; i++)
            {
                Document doc = new Document();
                sb.Append(term).Append(" ");
                string content = sb.ToString();
                Field  noTf    = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType);
                doc.Add(noTf);

                Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType);
                doc.Add(tf);

                writer.AddDocument(doc);
                //System.out.println(d);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             */
            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            searcher.Similarity = new SimpleSimilarity();

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d  = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d);

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            try
            {
                searcher.Search(pq, 10);
                Assert.Fail("did not hit expected exception");
            }
            catch (Exception e)
            {
                Exception cause = e;
                // If the searcher uses an executor service, the IAE is wrapped into other exceptions
                while (cause.InnerException != null)
                {
                    cause = cause.InnerException;
                }
                if (!(cause is InvalidOperationException))
                {
                    throw new InvalidOperationException("Expected an IAE", e);
                } // else OK because positions are not indexed
            }

            searcher.Search(q1, new CountingHitCollectorAnonymousClass(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q2, new CountingHitCollectorAnonymousClass2(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q3, new CountingHitCollectorAnonymousClass3(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q4, new CountingHitCollectorAnonymousClass4(this));
            //System.out.println(CountingHitCollector.getCount());

            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new CountingHitCollectorAnonymousClass5(this));
            Assert.AreEqual(15, CountingHitCollector.Count);

            reader.Dispose();
            dir.Dispose();
        }