Holds all the configuration that is used to create an IndexWriter. Once IndexWriter has been created with this object, changes to this object will not affect the IndexWriter instance. For that, use LiveIndexWriterConfig that is returned from IndexWriter#getConfig().

All setter methods return IndexWriterConfig to allow chaining settings conveniently, for example:

 IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setter1().setter2(); 
Inheritance: LiveIndexWriterConfig, ICloneable
        public virtual void TestClone()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriterConfig clone = (IndexWriterConfig)conf.Clone();

            // Make sure parameters that can't be reused are cloned
            IndexDeletionPolicy delPolicy = conf.DelPolicy;
            IndexDeletionPolicy delPolicyClone = clone.DelPolicy;
            Assert.IsTrue(delPolicy.GetType() == delPolicyClone.GetType() && (delPolicy != delPolicyClone || delPolicy.Clone() == delPolicyClone.Clone()));

            FlushPolicy flushPolicy = conf.FlushPolicy;
            FlushPolicy flushPolicyClone = clone.FlushPolicy;
            Assert.IsTrue(flushPolicy.GetType() == flushPolicyClone.GetType() && (flushPolicy != flushPolicyClone || flushPolicy.Clone() == flushPolicyClone.Clone()));

            DocumentsWriterPerThreadPool pool = conf.IndexerThreadPool;
            DocumentsWriterPerThreadPool poolClone = clone.IndexerThreadPool;
            Assert.IsTrue(pool.GetType() == poolClone.GetType() && (pool != poolClone || pool.Clone() == poolClone.Clone()));

            MergePolicy mergePolicy = conf.MergePolicy;
            MergePolicy mergePolicyClone = clone.MergePolicy;
            Assert.IsTrue(mergePolicy.GetType() == mergePolicyClone.GetType() && (mergePolicy != mergePolicyClone || mergePolicy.Clone() == mergePolicyClone.Clone()));

            MergeScheduler mergeSched = conf.MergeScheduler;
            MergeScheduler mergeSchedClone = clone.MergeScheduler;
            Assert.IsTrue(mergeSched.GetType() == mergeSchedClone.GetType() && (mergeSched != mergeSchedClone || mergeSched.Clone() == mergeSchedClone.Clone()));

            conf.SetMergeScheduler(new SerialMergeScheduler());
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), clone.MergeScheduler.GetType());
        }
Exemple #2
0
        public async Task <ActionResult <DndSpell> > updateIndex()
        {
            var AppLuceneVersion = Lucene.Net.Util.LuceneVersion.LUCENE_48;

            var indexLocation = @"C:\Index";
            var dir           = FSDirectory.Open(indexLocation);

            var analyzer = new StandardAnalyzer(AppLuceneVersion);

            var indexconfig = new Lucene.Net.Index.IndexWriterConfig(AppLuceneVersion, analyzer);
            var write       = new IndexWriter(dir, indexconfig);

            var data = _context.DndSpell.ToList();

            foreach (var spell in data)
            {
                Document doc = new Document
                {
                    new StringField("name", spell.Name, Field.Store.YES),
                    new StringField("search_name", spell.Name.ToLower(), Field.Store.YES),
                    new StringField("id", spell.Id.ToString(), Field.Store.YES),
                    //new StringField("desc", spell.Description.ToLower(),Field.Store.YES),
                    new StringField("slug", spell.Slug.ToLower(), Field.Store.YES),
                };

                Console.WriteLine(string.Format(" writing {0}", spell.Name));
                write.AddDocument(doc);
                write.Flush(triggerMerge: false, applyAllDeletes: false);
                write.Commit();
            }

            return(NotFound());
        }
Exemple #3
0
        public void TestEmptyChildFilter()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
            // we don't want to merge - since we rely on certain segment setup
            IndexWriter w = new IndexWriter(dir, config);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);
            w.Commit();
            int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
            for (int i = 0; i < num; i++)
            {
                docs.Clear();
                docs.Add(MakeJob("java", 2007));
                w.AddDocuments(docs);
            }

            IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
            w.Dispose();
            assertTrue(r.Leaves.size() > 1);
            IndexSearcher s = new IndexSearcher(r);
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
            s.Search(fullQuery, c);
            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            assertFalse(float.IsNaN(results.MaxScore));
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);
            IGroupDocs<int> group = results.Groups[0];
            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestDateCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            const long @base = 13; // prime
            long day = 1000L * 60 * 60 * 24;

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 50; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs costed less than if they had only been packed
            Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8);
        }
        public virtual void TestInfoStreamGetsFieldName()
        {
            Directory dir = NewDirectory();
            IndexWriter writer;
            IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer());
            ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
            StreamWriter infoPrintStream = new StreamWriter(infoBytes, Encoding.UTF8);
            PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream);
            c.InfoStream = printStreamInfoStream;
            writer = new IndexWriter(dir, c);
            Document doc = new Document();
            doc.Add(NewField("distinctiveFieldName", "aaa ", StoredTextType));
            try
            {
                writer.AddDocument(doc);
                Assert.Fail("Failed to fail.");
            }
            catch (BadNews badNews)
            {
                infoPrintStream.Flush();
                string infoStream = Encoding.UTF8.GetString(infoBytes.GetBuffer());
                Assert.IsTrue(infoStream.Contains("distinctiveFieldName"));
            }

            writer.Dispose();
            dir.Dispose();
        }
 public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2)
 {
     this.input = input;
     this.dir1 = dir1;
     this.dir2 = dir2;
     this.docsInFirstIndex = docsInFirstIndex;
     this.config1 = config1;
     this.config2 = config2;
 }
 public override void SetUp()
 {
     Directory = NewDirectory();
     IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     IndexWriter indexWriter = new IndexWriter(Directory, config);
     for (int i = 0; i < AMOUNT_OF_SEGMENTS; i++)
     {
         IList<Document> segmentDocs = CreateDocsForSegment(i);
         indexWriter.AddDocuments(segmentDocs);
         indexWriter.Commit();
     }
     IndexReader = DirectoryReader.Open(indexWriter, Random().NextBoolean());
     indexWriter.Dispose();
     IndexSearcher = new IndexSearcher(IndexReader);
     ParentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*"))));
 }
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);

            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;
            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.OmitNorms = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            Field field = new Field("field", new MyTokenStream(), ft);
            doc.Add(field);

            int numDocs = (int.MaxValue / 26) + 1;
            for (int i = 0; i < numDocs; i++)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % 100000 == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestUniqueValuesCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256);
            IList<long> values = new List<long>();

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                long value;
                if (values.Count < uniqueValueCount)
                {
                    value = Random().NextLong();
                    values.Add(value);
                }
                else
                {
                    value = RandomInts.RandomFrom(Random(), values);
                }
                dvf.LongValue = value;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            for (int i = 0; i < 20; ++i)
            {
                dvf.LongValue = RandomInts.RandomFrom(Random(), values);
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new longs did not cost 8 bytes each
            Assert.IsTrue(size2 < size1 + 8 * 20);
        }
        public virtual void TestSingleBigValueCompression()
        {
            Directory dir = new RAMDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter iwriter = new IndexWriter(dir, iwc);

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
            doc.Add(dvf);
            for (int i = 0; i < 20000; ++i)
            {
                dvf.LongValue = i & 1023;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);
            dvf.LongValue = long.MaxValue;
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);
            // make sure the new value did not grow the bpv for every other value
            Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8);
        }
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random());
            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors = true;
                ft.StoreTermVectorOffsets = Random().NextBoolean();
                ft.StoreTermVectorPositions = Random().NextBoolean();
            }

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.IntToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int freq = dp.Freq();
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset();
                        Debug.Assert(start >= 0);
                        int end = dp.EndOffset();
                        Debug.Assert(end >= 0 && end >= start);
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.Payload);
                            BytesRef payload = dp.Payload;
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq();
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset();
                    Debug.Assert(start >= 0);
                    int end = dp.EndOffset();
                    Debug.Assert(end >= 0 && end >= start);
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred"));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.Payload);
                        BytesRef payload = dp.Payload;
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);
            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);
            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.BufferedUpdatesStreamAny);

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.Reader;
            Assert.IsFalse(writer.BufferedUpdatesStreamAny);
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.DoMerge = true;
            fsmp.Start = 0;
            fsmp.Length = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.Reader;
            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs == null);
            r2.Dispose();

            /*
            /// // added docs are in the ram buffer
            /// for (int x = 15; x < 20; x++) {
            ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
            ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            /// }
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            /// // delete from the ram buffer
            /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
            ///
            /// Term id3 = new Term("id", Integer.toString(3));
            ///
            /// // delete from the 1st segment
            /// writer.DeleteDocuments(id3);
            ///
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            ///
            /// //System.out
            /// //    .println("segdels1:" + writer.docWriter.deletesToString());
            ///
            /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
            ///
            /// // we cause a merge to happen
            /// fsmp.doMerge = true;
            /// fsmp.start = 0;
            /// fsmp.Length = 2;
            /// System.out.println("maybeMerge "+writer.SegmentInfos);
            ///
            /// SegmentInfo info0 = writer.SegmentInfos.Info(0);
            /// SegmentInfo info1 = writer.SegmentInfos.Info(1);
            ///
            /// writer.MaybeMerge();
            /// System.out.println("maybeMerge after "+writer.SegmentInfos);
            /// // there should be docs in RAM
            /// Assert.IsTrue(writer.numRamDocs() > 0);
            ///
            /// // assert we've merged the 1 and 2 segments
            /// // and still have a segment leftover == 2
            /// Assert.AreEqual(2, writer.SegmentInfos.Size());
            /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
            /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
            ///
            /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
            ///
            /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
            ///
            /// IndexReader r = writer.GetReader();
            /// IndexReader r1 = r.getSequentialSubReaders()[0];
            /// printDelDocs(r1.GetLiveDocs());
            /// int[] docs = toDocsArray(id3, null, r);
            /// System.out.println("id3 docs:"+Arrays.toString(docs));
            /// // there shouldn't be any docs for id:3
            /// Assert.IsTrue(docs == null);
            /// r.Dispose();
            ///
            /// part2(writer, fsmp);
            ///
            */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestSortedSetWithDups()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc       = new Document();
                int      numValues = Random.Next(5);
                for (int j = 0; j < numValues; j++)
                {
                    doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random, 2))));
                }
                iw.AddDocument(doc);
                if (Random.Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.GetReader();

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.GetReader();
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            SortedSetDocValues multi  = MultiDocValues.GetSortedSetValues(ir, "bytes");
            SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");

            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.ValueCount, multi.ValueCount);
                BytesRef actual   = new BytesRef();
                BytesRef expected = new BytesRef();
                // check values
                for (long i = 0; i < single.ValueCount; i++)
                {
                    single.LookupOrd(i, expected);
                    multi.LookupOrd(i, actual);
                    Assert.AreEqual(expected, actual);
                }
                // check ord list
                for (int i = 0; i < numDocs; i++)
                {
                    single.SetDocument(i);
                    List <long?> expectedList = new List <long?>();
                    long         ord;
                    while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        expectedList.Add(ord);
                    }

                    multi.SetDocument(i);
                    int upto = 0;
                    while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        Assert.AreEqual((long)expectedList[upto], ord);
                        upto++;
                    }
                    Assert.AreEqual(expectedList.Count, upto);
                }
            }

            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
Exemple #14
0
        protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled)
        {
            int                    numDocumentsToIndex = 10 + AtLeast(30);
            AtomicInt32            numDocs             = new AtomicInt32(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
            MockAnalyzer           analyzer    = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy);
            int numDWPT           = 1 + AtLeast(2);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);
            iwc.SetRAMBufferSizeMB(maxRamMB);
            iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            Assert.IsFalse(flushPolicy.FlushOnDocCount);
            Assert.IsFalse(flushPolicy.FlushOnDeleteTerms);
            Assert.IsTrue(flushPolicy.FlushOnRAM);
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.flushControl;

            Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);

            Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads);
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
            AssertActiveBytesAfter(flushControl);
            if (flushPolicy.HasMarkedPending)
            {
                Assert.IsTrue(maxRAMBytes < flushControl.peakActiveBytes);
            }
            if (ensureNotStalled)
            {
                Assert.IsFalse(docsWriter.flushControl.stallControl.WasStalled);
            }
            writer.Dispose();
            Assert.AreEqual(0, flushControl.ActiveBytes);
            dir.Dispose();
        }
        public virtual void TestFixedBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            Document doc = new Document();
            var bytes = new byte[4];
            BytesRef data = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 24);
                bytes[1] = (byte)(i >> 16);
                bytes[2] = (byte)(i >> 8);
                bytes[3] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            int expectedValue = 0;
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                BytesRef scratch = new BytesRef();
                BinaryDocValues dv = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 24);
                    bytes[1] = (byte)(expectedValue >> 16);
                    bytes[2] = (byte)(expectedValue >> 8);
                    bytes[3] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestTryDeleteDocument()
        {
            Directory d = NewDirectory();

            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter w = new IndexWriter(d, iwc);
            Document doc = new Document();
            w.AddDocument(doc);
            w.AddDocument(doc);
            w.AddDocument(doc);
            w.Dispose();

            iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND);
            w = new IndexWriter(d, iwc);
            IndexReader r = DirectoryReader.Open(w, false);
            Assert.IsTrue(w.TryDeleteDocument(r, 1));
            Assert.IsTrue(w.TryDeleteDocument(r.Leaves[0].Reader, 0));
            r.Dispose();
            w.Dispose();

            r = DirectoryReader.Open(d);
            Assert.AreEqual(2, r.NumDeletedDocs);
            Assert.IsNotNull(MultiFields.GetLiveDocs(r));
            r.Dispose();
            d.Dispose();
        }
        public virtual void TestExistingDeletes()
        {
            Directory[] dirs = new Directory[2];
            for (int i = 0; i < dirs.Length; i++)
            {
                dirs[i] = NewDirectory();
                IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
                IndexWriter writer = new IndexWriter(dirs[i], conf);
                Document doc = new Document();
                doc.Add(new StringField("id", "myid", Field.Store.NO));
                writer.AddDocument(doc);
                writer.Dispose();
            }

            IndexWriterConfig conf_ = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter writer_ = new IndexWriter(dirs[0], conf_);

            // Now delete the document
            writer_.DeleteDocuments(new Term("id", "myid"));
            IndexReader r = DirectoryReader.Open(dirs[1]);
            try
            {
                writer_.AddIndexes(r);
            }
            finally
            {
                r.Dispose();
            }
            writer_.Commit();
            Assert.AreEqual(1, writer_.NumDocs(), "Documents from the incoming index should not have been deleted");
            writer_.Dispose();

            foreach (Directory dir in dirs)
            {
                dir.Dispose();
            }
        }
Exemple #18
0
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
Exemple #19
0
        public virtual void TestRandomWithPrefix()
        {
            Directory dir = NewDirectory();

            HashSet <string> prefixes = new HashSet <string>();
            int numPrefix             = TestUtil.NextInt(Random(), 2, 7);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: use " + numPrefix + " prefixes");
            }
            while (prefixes.Count < numPrefix)
            {
                prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random()));
                //prefixes.Add(TestUtil.RandomSimpleString(random));
            }
            string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/);

            int NUM_TERMS            = AtLeast(20);
            HashSet <BytesRef> terms = new HashSet <BytesRef>();

            while (terms.Count < NUM_TERMS)
            {
                string s = prefixesArray[Random().Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random());
                //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random);
                if (s.Length > 0)
                {
                    terms.Add(new BytesRef(s));
                }
            }
            BytesRef[] termsArray = terms.ToArray();
            Array.Sort(termsArray);

            int NUM_DOCS = AtLeast(100);

            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Sometimes swap in codec that impls ord():
            if (Random().Next(10) == 7)
            {
                Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds"));
                conf.SetCodec(codec);
            }

            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, conf);

            int[][]        idToOrds      = new int[NUM_DOCS][];
            HashSet <int?> ordsForDocSet = new HashSet <int?>();

            for (int id = 0; id < NUM_DOCS; id++)
            {
                Document doc = new Document();

                doc.Add(new Int32Field("id", id, Field.Store.NO));

                int termCount = TestUtil.NextInt(Random(), 0, 20 * RANDOM_MULTIPLIER);
                while (ordsForDocSet.Count < termCount)
                {
                    ordsForDocSet.Add(Random().Next(termsArray.Length));
                }
                int[] ordsForDoc = new int[termCount];
                int   upto       = 0;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: doc id=" + id);
                }
                foreach (int ord in ordsForDocSet)
                {
                    ordsForDoc[upto++] = ord;
                    Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  f=" + termsArray[ord].Utf8ToString());
                    }
                    doc.Add(field);
                }
                ordsForDocSet.Clear();
                Array.Sort(ordsForDoc);
                idToOrds[id] = ordsForDoc;
                w.AddDocument(doc);
            }

            DirectoryReader r = w.Reader;

            w.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: reader=" + r);
            }

            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r);

            foreach (string prefix in prefixesArray)
            {
                BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);

                int[][] idToOrdsPrefix = new int[NUM_DOCS][];
                for (int id = 0; id < NUM_DOCS; id++)
                {
                    int[]        docOrds = idToOrds[id];
                    IList <int?> newOrds = new List <int?>();
                    foreach (int ord in idToOrds[id])
                    {
                        if (StringHelper.StartsWith(termsArray[ord], prefixRef))
                        {
                            newOrds.Add(ord);
                        }
                    }
                    int[] newOrdsArray = new int[newOrds.Count];
                    int   upto         = 0;
                    foreach (int ord in newOrds)
                    {
                        newOrdsArray[upto++] = ord;
                    }
                    idToOrdsPrefix[id] = newOrdsArray;
                }

                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("\nTEST: sub=" + ctx.Reader);
                    }
                    Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef);
                }

                // Also test top-level reader: its enum does not support
                // ord, so this forces the OrdWrapper to run:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: top reader");
                }
                Verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
            }

            FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey);

            r.Dispose();
            dir.Dispose();
        }
Exemple #20
0
        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            long t0 = DateTime.Now.Millisecond;

            Random        random  = new Random(Random().Next());
            LineFileDocs  docs    = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);

            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
            TaskScheduler es = null;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet <string>   delIDs     = new ConcurrentHashSet <string>(new HashSet <string>());
            ISet <string>   delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>());
            IList <SubDocs> allSubDocs = new ConcurrentList <SubDocs>(new List <SubDocs>());

            long stopTime = DateTime.Now.Millisecond + RUN_TIME_SEC * 1000;

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.Now.Millisecond - t0) + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.Now.Millisecond - t0) + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.Now.Millisecond - t0) + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;

            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs);
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.Now.Millisecond - t0) + " ms]");
            }
        }
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field         field       = new Field("field", tokenStream, ft);

            doc.Add(field);

            const int numDocs = 1000;

            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);

            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader          mr   = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));

            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = Throttling.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);

            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));

            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = Throttling.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));

            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory         dir = new MockDirectoryWrapper(new Random(Random.Next()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);

            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);

            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.bufferedUpdatesStream.Any());

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.GetReader();

            Assert.IsFalse(writer.bufferedUpdatesStream.Any());
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp         = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.DoMerge = true;
            fsmp.Start   = 0;
            fsmp.Length  = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.GetReader();

            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs == null);
            r2.Dispose();

            /*
             * /// // added docs are in the ram buffer
             * /// for (int x = 15; x < 20; x++) {
             * ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
             * ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
             * /// }
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * /// // delete from the ram buffer
             * /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
             * ///
             * /// Term id3 = new Term("id", Integer.toString(3));
             * ///
             * /// // delete from the 1st segment
             * /// writer.DeleteDocuments(id3);
             * ///
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// //System.out
             * /// //    .println("segdels1:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// // we cause a merge to happen
             * /// fsmp.doMerge = true;
             * /// fsmp.start = 0;
             * /// fsmp.Length = 2;
             * /// System.out.println("maybeMerge "+writer.SegmentInfos);
             * ///
             * /// SegmentInfo info0 = writer.SegmentInfos.Info(0);
             * /// SegmentInfo info1 = writer.SegmentInfos.Info(1);
             * ///
             * /// writer.MaybeMerge();
             * /// System.out.println("maybeMerge after "+writer.SegmentInfos);
             * /// // there should be docs in RAM
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// // assert we've merged the 1 and 2 segments
             * /// // and still have a segment leftover == 2
             * /// Assert.AreEqual(2, writer.SegmentInfos.Size());
             * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
             * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
             * ///
             * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// IndexReader r = writer.GetReader();
             * /// IndexReader r1 = r.getSequentialSubReaders()[0];
             * /// printDelDocs(r1.GetLiveDocs());
             * /// int[] docs = toDocsArray(id3, null, r);
             * /// System.out.println("id3 docs:"+Arrays.toString(docs));
             * /// // there shouldn't be any docs for id:3
             * /// Assert.IsTrue(docs == null);
             * /// r.Dispose();
             * ///
             * /// part2(writer, fsmp);
             * ///
             */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestIntersectStartTerm()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(new LogDocMergePolicy());

            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(NewStringField("field", "abc", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "abd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "acd", Field.Store.NO));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewStringField("field", "bcd", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);
            DirectoryReader r = w.GetReader();

            w.Dispose();
            AtomicReader sub   = GetOnlySegmentReader(r);
            Terms        terms = sub.Fields.GetTerms("field");

            Automaton         automaton = (new RegExp(".*d", RegExpSyntax.NONE)).ToAutomaton();
            CompiledAutomaton ca        = new CompiledAutomaton(automaton, false, false);
            TermsEnum         te;

            // should seek to startTerm
            te = terms.Intersect(ca, new BytesRef("aad"));
            Assert.AreEqual("abd", te.Next().Utf8ToString());
            Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("acd", te.Next().Utf8ToString());
            Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should fail to find ceil label on second arc, rewind
            te = terms.Intersect(ca, new BytesRef("add"));
            Assert.AreEqual("bcd", te.Next().Utf8ToString());
            Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc());
            Assert.IsNull(te.Next());

            // should reach end
            te = terms.Intersect(ca, new BytesRef("bcd"));
            Assert.IsNull(te.Next());
            te = terms.Intersect(ca, new BytesRef("ddd"));
            Assert.IsNull(te.Next());

            r.Dispose();
            dir.Dispose();
        }
Exemple #24
0
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler)
        {
            MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields")));

            dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER;

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(scheduler)
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType();

            ft.Indexed = false;
            ft.Stored  = true;
            ft.Freeze();
            int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20);
            var value       = new byte[valueLength];

            for (int i = 0; i < valueLength; ++i)
            {
                // random so that even compressing codecs can't compress it
                value[i] = (byte)Random().Next(256);
            }
            Field f = new Field("fld", value, ft);

            doc.Add(f);

            int numDocs = (int)((1L << 32) / valueLength + 100);

            for (int i = 0; i < numDocs; ++i)
            {
                w.AddDocument(doc);
                if (VERBOSE && i % (numDocs / 10) == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            if (VERBOSE)
            {
                bool found = false;
                foreach (string file in dir.ListAll())
                {
                    if (file.EndsWith(".fdt"))
                    {
                        long fileLength = dir.FileLength(file);
                        if (fileLength >= 1L << 32)
                        {
                            found = true;
                        }
                        Console.WriteLine("File length of " + file + " : " + fileLength);
                    }
                }
                if (!found)
                {
                    Console.WriteLine("No .fdt file larger than 4GB, test bug?");
                }
            }

            DirectoryReader rd = DirectoryReader.Open(dir);
            Document        sd = rd.Document(numDocs - 1);

            Assert.IsNotNull(sd);
            Assert.AreEqual(1, sd.Fields.Count);
            BytesRef valueRef = sd.GetBinaryValue("fld");

            Assert.IsNotNull(valueRef);
            Assert.AreEqual(new BytesRef(value), valueRef);
            rd.Dispose();

            dir.Dispose();
        }
        public virtual void TestBooleanSpanQuery()
        {
            int hits = 0;
            Directory directory = NewDirectory();
            Analyzer indexerAnalyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            string FIELD = "content";
            Document d = new Document();
            d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
            writer.AddDocument(d);
            writer.Dispose();

            IndexReader indexReader = DirectoryReader.Open(directory);
            IndexSearcher searcher = NewSearcher(indexReader);

            DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
            SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
            SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
            query.Add(sq1);
            query.Add(sq2);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
            searcher.Search(query, collector);
            hits = collector.TopDocs().ScoreDocs.Length;
            foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs)
            {
                Console.WriteLine(scoreDoc.Doc);
            }
            indexReader.Dispose();
            Assert.AreEqual(hits, 1);
            directory.Dispose();
        }
Exemple #26
0
        public virtual void Test()
        {
            MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors"));

            dir.PreventDoubleWrite = false;
            double rate = Random().NextDouble() * 0.01;

            //System.out.println("rate=" + rate);
            dir.RandomIOExceptionRateOnOpen = rate;
            int                  iters       = AtLeast(20);
            LineFileDocs         docs        = new LineFileDocs(Random(), DefaultCodecSupportsDocValues());
            IndexReader          r           = null;
            DirectoryReader      r2          = null;
            bool                 any         = false;
            MockDirectoryWrapper dirCopy     = null;
            int                  lastNumDocs = 0;

            for (int iter = 0; iter < iters; iter++)
            {
                IndexWriter w = null;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                try
                {
                    MockAnalyzer analyzer = new MockAnalyzer(Random());
                    analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
                    IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

                    if (VERBOSE)
                    {
                        // Do this ourselves instead of relying on LTC so
                        // we see incrementing messageID:
                        iwc.SetInfoStream(new PrintStreamInfoStream(Console.Out));
                    }
                    var ms = iwc.MergeScheduler;
                    if (ms is IConcurrentMergeScheduler)
                    {
                        ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                    }
                    w = new IndexWriter(dir, iwc);
                    if (r != null && Random().Next(5) == 3)
                    {
                        if (Random().NextBoolean())
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes IR[]");
                            }
                            w.AddIndexes(new IndexReader[] { r });
                        }
                        else
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: addIndexes Directory[]");
                            }
                            w.AddIndexes(new Directory[] { dirCopy });
                        }
                    }
                    else
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: addDocument");
                        }
                        w.AddDocument(docs.NextDoc());
                    }
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    w.Dispose();
                    w = null;

                    // NOTE: this is O(N^2)!  Only enable for temporary debugging:
                    //dir.setRandomIOExceptionRateOnOpen(0.0);
                    //TestUtil.CheckIndex(dir);
                    //dir.setRandomIOExceptionRateOnOpen(rate);

                    // Verify numDocs only increases, to catch IndexWriter
                    // accidentally deleting the index:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    Assert.IsTrue(DirectoryReader.IndexExists(dir));
                    if (r2 == null)
                    {
                        r2 = DirectoryReader.Open(dir);
                    }
                    else
                    {
                        DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2);
                        if (r3 != null)
                        {
                            r2.Dispose();
                            r2 = r3;
                        }
                    }
                    Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs);
                    lastNumDocs = r2.NumDocs;
                    //System.out.println("numDocs=" + lastNumDocs);
                    dir.RandomIOExceptionRateOnOpen = rate;

                    any = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": success");
                    }
                }
                catch (IOException ioe)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: iter=" + iter + ": exception");
                        Console.WriteLine(ioe.ToString());
                        Console.Write(ioe.StackTrace);
                    }
                    if (w != null)
                    {
                        // NOTE: leave random IO exceptions enabled here,
                        // to verify that rollback does not try to write
                        // anything:
                        w.Rollback();
                    }
                }

                if (any && r == null && Random().NextBoolean())
                {
                    // Make a copy of a non-empty index so we can use
                    // it to addIndexes later:
                    dir.RandomIOExceptionRateOnOpen = 0.0;
                    r       = DirectoryReader.Open(dir);
                    dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy"));
                    HashSet <string> files = new HashSet <string>();
                    foreach (string file in dir.ListAll())
                    {
                        dir.Copy(dirCopy, file, file, IOContext.DEFAULT);
                        files.Add(file);
                    }
                    dirCopy.Sync(files);
                    // Have IW kiss the dir so we remove any leftover
                    // files ... we can easily have leftover files at
                    // the time we take a copy because we are holding
                    // open a reader:
                    (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose();
                    dirCopy.RandomIOExceptionRate   = rate;
                    dir.RandomIOExceptionRateOnOpen = rate;
                }
            }

            if (r2 != null)
            {
                r2.Dispose();
            }
            if (r != null)
            {
                r.Dispose();
                dirCopy.Dispose();
            }
            dir.Dispose();
        }
 public IndexWriterAnonymousInnerClassHelper(TestIndexWriterDelete outerInstance, Directory dir, IndexWriterConfig setReaderPooling, AtomicInteger docsInSegment, AtomicBoolean closing, AtomicBoolean sawAfterFlush)
     : base(dir, setReaderPooling)
 {
     this.OuterInstance = outerInstance;
     this.DocsInSegment = docsInSegment;
     this.Closing = closing;
     this.SawAfterFlush = sawAfterFlush;
 }
Exemple #28
0
        public virtual void TestStressMultiThreading()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            IndexWriter       writer = new IndexWriter(dir, conf);

            // create index
            int numThreads = TestUtil.NextInt32(Random, 3, 6);
            int numDocs    = AtLeast(2000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("id", "doc" + i, Store.NO));
                double group = Random.NextDouble();
                string g;
                if (group < 0.1)
                {
                    g = "g0";
                }
                else if (group < 0.5)
                {
                    g = "g1";
                }
                else if (group < 0.8)
                {
                    g = "g2";
                }
                else
                {
                    g = "g3";
                }
                doc.Add(new StringField("updKey", g, Store.NO));
                for (int j = 0; j < numThreads; j++)
                {
                    long value = Random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value)));
                    doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2
                }
                writer.AddDocument(doc);
            }

            CountdownEvent done       = new CountdownEvent(numThreads);
            AtomicInt32    numUpdates = new AtomicInt32(AtLeast(100));

            // same thread updates a field as well as reopens
            ThreadClass[] threads = new ThreadClass[numThreads];
            for (int i = 0; i < threads.Length; i++)
            {
                string f  = "f" + i;
                string cf = "cf" + i;
                threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf);
            }

            foreach (ThreadClass t in threads)
            {
                t.Start();
            }
            done.Wait();
            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                for (int i = 0; i < numThreads; i++)
                {
                    BinaryDocValues  bdv             = r.GetBinaryDocValues("f" + i);
                    NumericDocValues control         = r.GetNumericDocValues("cf" + i);
                    IBits            docsWithBdv     = r.GetDocsWithField("f" + i);
                    IBits            docsWithControl = r.GetDocsWithField("cf" + i);
                    IBits            liveDocs        = r.LiveDocs;
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        if (liveDocs == null || liveDocs.Get(j))
                        {
                            Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j));
                            if (docsWithBdv.Get(j))
                            {
                                long ctrlValue = control.Get(j);
                                long bdvValue  = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2;
                                //              if (ctrlValue != bdvValue) {
                                //                System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch);
                                //              }
                                Assert.AreEqual(ctrlValue, bdvValue);
                            }
                        }
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Exemple #29
0
 /// <summary>
 /// Creates index upgrader on the given directory, using an <seealso cref="IndexWriter"/> using the given
 /// config. You have the possibility to upgrade indexes with multiple commit points by removing
 /// all older ones.
 /// </summary>
 public IndexUpgrader(Directory dir, IndexWriterConfig iwc, bool deletePriorCommits)
 {
     this.Dir = dir;
     this.Iwc = iwc;
     this.DeletePriorCommits = deletePriorCommits;
 }
Exemple #30
0
        public virtual void TestManyReopensAndFields()
        {
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
            LogMergePolicy    lmp    = NewLogMergePolicy();

            lmp.MergeFactor = 3; // merge often
            conf.SetMergePolicy(lmp);
            IndexWriter writer = new IndexWriter(dir, conf);

            bool            isNRT = random.NextBoolean();
            DirectoryReader reader;

            if (isNRT)
            {
                reader = DirectoryReader.Open(writer, true);
            }
            else
            {
                writer.Commit();
                reader = DirectoryReader.Open(dir);
            }

            int numFields    = random.Next(4) + 3;             // 3-7
            int numNDVFields = random.Next(numFields / 2) + 1; // 1-3

            long[] fieldValues   = new long[numFields];
            bool[] fieldHasValue = new bool[numFields];
            Arrays.Fill(fieldHasValue, true);
            for (int i = 0; i < fieldValues.Length; i++)
            {
                fieldValues[i] = 1;
            }

            int numRounds = AtLeast(15);
            int docID     = 0;

            for (int i = 0; i < numRounds; i++)
            {
                int numDocs = AtLeast(5);
                //      System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
                for (int j = 0; j < numDocs; j++)
                {
                    Document doc = new Document();
                    doc.Add(new StringField("id", "doc-" + docID, Store.NO));
                    doc.Add(new StringField("key", "all", Store.NO)); // update key
                    // add all fields with their current value
                    for (int f = 0; f < fieldValues.Length; f++)
                    {
                        if (f < numNDVFields)
                        {
                            doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
                        }
                        else
                        {
                            doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
                        }
                    }
                    writer.AddDocument(doc);
                    ++docID;
                }

                // if field's value was unset before, unset it from all new added documents too
                for (int field = 0; field < fieldHasValue.Length; field++)
                {
                    if (!fieldHasValue[field])
                    {
                        if (field < numNDVFields)
                        {
                            writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
                        }
                        else
                        {
                            writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
                        }
                    }
                }

                int    fieldIdx    = random.Next(fieldValues.Length);
                string updateField = "f" + fieldIdx;
                if (random.NextBoolean())
                {
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
                    fieldHasValue[fieldIdx] = false;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
                    }
                }
                else
                {
                    fieldHasValue[fieldIdx] = true;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
                    }
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
                }

                if (random.NextDouble() < 0.2)
                {
                    int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
                    writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
                }

                // verify reader
                if (!isNRT)
                {
                    writer.Commit();
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
                DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
                Assert.IsNotNull(newReader);
                reader.Dispose();
                reader = newReader;
                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
                Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
                BytesRef scratch = new BytesRef();
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader r = context.AtomicReader;
                    //        System.out.println(((SegmentReader) r).getSegmentName());
                    IBits liveDocs = r.LiveDocs;
                    for (int field = 0; field < fieldValues.Length; field++)
                    {
                        string           f             = "f" + field;
                        BinaryDocValues  bdv           = r.GetBinaryDocValues(f);
                        NumericDocValues ndv           = r.GetNumericDocValues(f);
                        IBits            docsWithField = r.GetDocsWithField(f);
                        if (field < numNDVFields)
                        {
                            Assert.IsNotNull(ndv);
                            Assert.IsNull(bdv);
                        }
                        else
                        {
                            Assert.IsNull(ndv);
                            Assert.IsNotNull(bdv);
                        }
                        int maxDoc = r.MaxDoc;
                        for (int doc = 0; doc < maxDoc; doc++)
                        {
                            if (liveDocs == null || liveDocs.Get(doc))
                            {
                                //              System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
                                if (fieldHasValue[field])
                                {
                                    Assert.IsTrue(docsWithField.Get(doc));
                                    if (field < numNDVFields)
                                    {
                                        Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                    else
                                    {
                                        Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                }
                                else
                                {
                                    Assert.IsFalse(docsWithField.Get(doc));
                                }
                            }
                        }
                    }
                }
                //      System.out.println();
            }

            IOUtils.Dispose(writer, reader, dir);
        }
Exemple #31
0
 public static IndexWriter MockIndexWriter(Directory dir, IndexWriterConfig conf, ITestPoint testPoint)
 {
     conf.SetInfoStream(new TestPointInfoStream(conf.InfoStream, testPoint));
     return(new IndexWriter(dir, conf));
 }
Exemple #32
0
        public virtual void TestTonsOfUpdates()
        {
            // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
            IndexWriter writer = new IndexWriter(dir, conf);

            // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
            int numDocs                  = AtLeast(20000);
            int numBinaryFields          = AtLeast(5);
            int numTerms                 = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs
            HashSet <string> updateTerms = new HashSet <string>();

            while (updateTerms.Count < numTerms)
            {
                updateTerms.Add(TestUtil.RandomSimpleString(random));
            }

            //    System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);

            // build a large index with many BDV fields and update terms
            for (int i = 0; i < numDocs; i++)
            {
                Document doc            = new Document();
                int      numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10);
                for (int j = 0; j < numUpdateTerms; j++)
                {
                    doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO));
                }
                for (int j = 0; j < numBinaryFields; j++)
                {
                    long val = random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
                    doc.Add(new NumericDocValuesField("cf" + j, val * 2));
                }
                writer.AddDocument(doc);
            }

            writer.Commit(); // commit so there's something to apply to

            // set to flush every 2048 bytes (approximately every 12 updates), so we get
            // many flushes during binary updates
            writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
            int numUpdates = AtLeast(100);

            //    System.out.println("numUpdates=" + numUpdates);
            for (int i = 0; i < numUpdates; i++)
            {
                int  field      = random.Next(numBinaryFields);
                Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms));
                long value      = random.Next();
                writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
            }

            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                for (int i = 0; i < numBinaryFields; i++)
                {
                    AtomicReader     r  = context.AtomicReader;
                    BinaryDocValues  f  = r.GetBinaryDocValues("f" + i);
                    NumericDocValues cf = r.GetNumericDocValues("cf" + i);
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Exemple #33
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                //numTerms /= 2;
                // LUCENENET specific - To keep this under the 1 hour free limit
                // of Azure DevOps, this was reduced from /2 to /6.
                numTerms /= 6;
            }
            if (Verbose)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i, CultureInfo.InvariantCulture);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList.Shuffle(Random);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int threadCount = TestUtil.NextInt32(Random, 1, 5);

            if (Verbose)
            {
                Console.WriteLine("config: " + iw.IndexWriter.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType(prototype.FieldType);

            if (Random.NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random.Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadJob[]    threads     = new ThreadJob[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random.Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.GetReader();

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetEnumerator();

            while (termsEnum.MoveNext())
            {
                int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture);
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random);

            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, Iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int    freq          = dp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset;
                        Debug.Assert(start >= 0);
                        int end = dp.EndOffset;
                        Debug.Assert(end >= 0 && end >= start);
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.GetPayload());
                            BytesRef payload = dp.GetPayload();
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq;
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset;
                    Debug.Assert(start >= 0);
                    int end = dp.EndOffset;
                    Debug.Assert(end >= 0 && end >= start);
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.GetPayload());
                        BytesRef payload = dp.GetPayload();
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
 public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2)
       : this(input, dir1, dir2, new TermRangeFilter(midTerm.Field, null, midTerm.Bytes, true, false), config1, config2)
 {
 }
 public override void SetUp()
 {
     base.SetUp();
     Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
 }
 public virtual void TestTooManySegments()
 {
     Directory dir = GetAssertNoDeletesDirectory(NewDirectory());
     // Don't use newIndexWriterConfig, because we need a
     // "sane" mergePolicy:
     IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     IndexWriter w = new IndexWriter(dir, iwc);
     // Create 500 segments:
     for (int i = 0; i < 500; i++)
     {
         Document doc = new Document();
         doc.Add(NewStringField("id", "" + i, Field.Store.NO));
         w.AddDocument(doc);
         IndexReader r = DirectoryReader.Open(w, true);
         // Make sure segment count never exceeds 100:
         Assert.IsTrue(r.Leaves().Count < 100);
         r.Dispose();
     }
     w.Dispose();
     dir.Dispose();
 }
Exemple #38
0
        /*
         * Run one indexer and 2 searchers against single index as
         * stress test.
         */

        public virtual void RunTest(Directory directory)
        {
            TimedThread[] threads = new TimedThread[4];

            IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(7);

            ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3;
            IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random);

            // Establish a base index of 100 docs:
            for (int i = 0; i < 100; i++)
            {
                Documents.Document d = new Documents.Document();
                d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES));
                d.Add(NewTextField("contents", English.Int32ToEnglish(i), Field.Store.NO));
                if ((i - 1) % 7 == 0)
                {
                    writer.Commit();
                }
                writer.AddDocument(d);
            }
            writer.Commit();

            IndexReader r = DirectoryReader.Open(directory);

            Assert.AreEqual(100, r.NumDocs);
            r.Dispose();

            IndexerThread indexerThread = new IndexerThread(writer, threads);

            threads[0] = indexerThread;
            indexerThread.Start();

            IndexerThread indexerThread2 = new IndexerThread(writer, threads);

            threads[1] = indexerThread2;
            indexerThread2.Start();

            SearcherThread searcherThread1 = new SearcherThread(directory, threads);

            threads[2] = searcherThread1;
            searcherThread1.Start();

            SearcherThread searcherThread2 = new SearcherThread(directory, threads);

            threads[3] = searcherThread2;
            searcherThread2.Start();

            indexerThread.Join();
            indexerThread2.Join();
            searcherThread1.Join();
            searcherThread2.Join();

            writer.Dispose();

            Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer");
            Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2");
            Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1");
            Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2");
            //System.out.println("    Writer: " + indexerThread.count + " iterations");
            //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created");
            //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created");
        }
 public override void SetUp()
 {
     base.SetUp();
     Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
 }
Exemple #40
0
 /// <summary>
 /// Creates index upgrader on the given directory, using an <seealso cref="IndexWriter"/> using the given
 /// config. You have the possibility to upgrade indexes with multiple commit points by removing
 /// all older ones.
 /// </summary>
 public IndexUpgrader(Directory dir, IndexWriterConfig iwc, bool deletePriorCommits)
 {
     this.Dir = dir;
     this.Iwc = iwc;
     this.DeletePriorCommits = deletePriorCommits;
 }
 public MyIndexWriter(Directory dir, IndexWriterConfig conf)
     : base(dir, conf)
 {
 }
Exemple #42
0
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestWriteReadMerge()
        {
            // get another codec, other than the default: so we are merging segments across different codecs
            Codec otherCodec;

            if ("SimpleText".Equals(Codec.Default.Name, StringComparison.Ordinal))
            {
                otherCodec = new Lucene46Codec();
            }
            else
            {
                otherCodec = new SimpleTextCodec();
            }
            Directory         dir    = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone());

            int docCount = AtLeast(200);
            var data     = new byte[docCount][][];

            for (int i = 0; i < docCount; ++i)
            {
                int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5);
                data[i] = new byte[fieldCount][];
                for (int j = 0; j < fieldCount; ++j)
                {
                    int length = Rarely() ? Random().Next(1000) : Random().Next(10);
                    int max    = Rarely() ? 256 : 2;
                    data[i][j] = RandomByteArray(length, max);
                }
            }

            FieldType type = new FieldType(StringField.TYPE_STORED);

            type.IsIndexed = false;
            type.Freeze();
            Int32Field id = new Int32Field("id", 0, Field.Store.YES);

            for (int i = 0; i < data.Length; ++i)
            {
                Document doc = new Document();
                doc.Add(id);
                id.SetInt32Value(i);
                for (int j = 0; j < data[i].Length; ++j)
                {
                    Field f = new Field("bytes" + j, data[i][j], type);
                    doc.Add(f);
                }
                iw.w.AddDocument(doc);
                if (Random().NextBoolean() && (i % (data.Length / 10) == 0))
                {
                    iw.w.Dispose();
                    // test merging against a non-compressing codec
                    if (iwConf.Codec == otherCodec)
                    {
                        iwConf.SetCodec(Codec.Default);
                    }
                    else
                    {
                        iwConf.SetCodec(otherCodec);
                    }
                    iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone());
                }
            }

            for (int i = 0; i < 10; ++i)
            {
                int min = Random().Next(data.Length);
                int max = min + Random().Next(20);
                iw.DeleteDocuments(NumericRangeQuery.NewInt32Range("id", min, max, true, false));
            }

            iw.ForceMerge(2); // force merges with deletions

            iw.Commit();

            DirectoryReader ir = DirectoryReader.Open(dir);

            Assert.IsTrue(ir.NumDocs > 0);
            int numDocs = 0;

            for (int i = 0; i < ir.MaxDoc; ++i)
            {
                Document doc = ir.Document(i);
                if (doc == null)
                {
                    continue;
                }
                ++numDocs;
                int docId = (int)doc.GetField("id").GetInt32Value();
                Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count);
                for (int j = 0; j < data[docId].Length; ++j)
                {
                    var      arr     = data[docId][j];
                    BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j);
                    var      arr2    = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length);
                    Assert.AreEqual(arr, arr2);
                }
            }
            Assert.IsTrue(ir.NumDocs <= numDocs);
            ir.Dispose();

            iw.DeleteAll();
            iw.Commit();
            iw.ForceMerge(1);

            iw.Dispose();
            dir.Dispose();
        }
Exemple #43
0
        internal readonly Codec Codec; // for writing new segments

        /// <summary>
        /// Constructs a new IndexWriter per the settings given in <code>conf</code>.
        /// If you want to make "live" changes to this writer instance, use
        /// <seealso cref="#getConfig()"/>.
        ///
        /// <p>
        /// <b>NOTE:</b> after ths writer is created, the given configuration instance
        /// cannot be passed to another writer. If you intend to do so, you should
        /// <seealso cref="IndexWriterConfig#clone() clone"/> it beforehand.
        /// </summary>
        /// <param name="d">
        ///          the index directory. The index is either created or appended
        ///          according <code>conf.getOpenMode()</code>. </param>
        /// <param name="conf">
        ///          the configuration settings according to which IndexWriter should
        ///          be initialized. </param>
        /// <exception cref="IOException">
        ///           if the directory cannot be read/written to, or if it does not
        ///           exist and <code>conf.getOpenMode()</code> is
        ///           <code>OpenMode.APPEND</code> or if there is any other low-level
        ///           IO error </exception>
        public IndexWriter(Directory d, IndexWriterConfig conf)
        {
            /*if (!InstanceFieldsInitialized)
            {
                InitializeInstanceFields();
                InstanceFieldsInitialized = true;
            }*/
            readerPool = new ReaderPool(this);
            conf.SetIndexWriter(this); // prevent reuse by other instances
            Config_Renamed = new LiveIndexWriterConfig(conf);
            directory = d;
            analyzer = Config_Renamed.Analyzer;
            infoStream = Config_Renamed.InfoStream;
            mergePolicy = Config_Renamed.MergePolicy;
            mergePolicy.IndexWriter = this;
            mergeScheduler = Config_Renamed.MergeScheduler;
            Codec = Config_Renamed.Codec;

            BufferedUpdatesStream = new BufferedUpdatesStream(infoStream);
            PoolReaders = Config_Renamed.ReaderPooling;

            WriteLock = directory.MakeLock(WRITE_LOCK_NAME);

            if (!WriteLock.Obtain(Config_Renamed.WriteLockTimeout)) // obtain write lock
            {
                throw new LockObtainFailedException("Index locked for write: " + WriteLock);
            }

            bool success = false;
            try
            {
                OpenMode_e? mode = Config_Renamed.OpenMode;
                bool create;
                if (mode == OpenMode_e.CREATE)
                {
                    create = true;
                }
                else if (mode == OpenMode_e.APPEND)
                {
                    create = false;
                }
                else
                {
                    // CREATE_OR_APPEND - create only if an index does not exist
                    create = !DirectoryReader.IndexExists(directory);
                }

                // If index is too old, reading the segments will throw
                // IndexFormatTooOldException.
                segmentInfos = new SegmentInfos();

                bool initialIndexExists = true;

                if (create)
                {
                    // Try to read first.  this is to allow create
                    // against an index that's currently open for
                    // searching.  In this case we write the next
                    // segments_N file with no segments:
                    try
                    {
                        segmentInfos.Read(directory);
                        segmentInfos.Clear();
                    }
                    catch (IOException)
                    {
                        // Likely this means it's a fresh directory
                        initialIndexExists = false;
                    }

                    // Record that we have a change (zero out all
                    // segments) pending:
                    Changed();
                }
                else
                {
                    segmentInfos.Read(directory);

                    IndexCommit commit = Config_Renamed.IndexCommit;
                    if (commit != null)
                    {
                        // Swap out all segments, but, keep metadata in
                        // SegmentInfos, like version & generation, to
                        // preserve write-once.  this is important if
                        // readers are open against the future commit
                        // points.
                        if (commit.Directory != directory)
                        {
                            throw new System.ArgumentException("IndexCommit's directory doesn't match my directory");
                        }
                        SegmentInfos oldInfos = new SegmentInfos();
                        oldInfos.Read(directory, commit.SegmentsFileName);
                        segmentInfos.Replace(oldInfos);
                        Changed();
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "init: loaded commit \"" + commit.SegmentsFileName + "\"");
                        }
                    }
                }

                RollbackSegments = segmentInfos.CreateBackupSegmentInfos();

                // start with previous field numbers, but new FieldInfos
                GlobalFieldNumberMap = FieldNumberMap;
                Config_Renamed.FlushPolicy.Init(Config_Renamed);
                DocWriter = new DocumentsWriter(this, Config_Renamed, directory);
                eventQueue = DocWriter.EventQueue();

                // Default deleter (for backwards compatibility) is
                // KeepOnlyLastCommitDeleter:
                lock (this)
                {
                    Deleter = new IndexFileDeleter(directory, Config_Renamed.DelPolicy, segmentInfos, infoStream, this, initialIndexExists);
                }

                if (Deleter.StartingCommitDeleted)
                {
                    // Deletion policy deleted the "head" commit point.
                    // We have to mark ourself as changed so that if we
                    // are closed w/o any further changes we write a new
                    // segments_N file.
                    Changed();
                }

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "init: create=" + create);
                    MessageState();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", "init: hit exception on init; releasing write lock");
                    }
                    WriteLock.Release();
                    IOUtils.CloseWhileHandlingException(WriteLock);
                    WriteLock = null;
                }
            }
        }
        public virtual void TestMaxMergeCount()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            int maxMergeCount = TestUtil.NextInt(Random(), 1, 5);
            int maxMergeThreads = TestUtil.NextInt(Random(), 1, maxMergeCount);
            CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount);
            AtomicInteger runningMergeCount = new AtomicInteger(0);
            AtomicBoolean failed = new AtomicBoolean();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads);
            }

            ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed);
            cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads);
            iwc.SetMergeScheduler(cms);
            iwc.SetMaxBufferedDocs(2);

            TieredMergePolicy tmp = new TieredMergePolicy();
            iwc.SetMergePolicy(tmp);
            tmp.MaxMergeAtOnce = 2;
            tmp.SegmentsPerTier = 2;

            IndexWriter w = new IndexWriter(dir, iwc);
            Document doc = new Document();
            doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED));
            while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get())
            {
                for (int i = 0; i < 10; i++)
                {
                    w.AddDocument(doc);
                }
            }
            w.Dispose(false);
            dir.Dispose();
        }
 private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
 {
     conf.SetMergePolicy(new LogDocMergePolicy());
     IndexWriter writer = new IndexWriter(dir, conf);
     return writer;
 }
        /// <summary>
        /// create a new index writer config with random defaults using the specified random </summary>
        public static IndexWriterConfig NewIndexWriterConfig(Random r, LuceneVersion v, Analyzer a)
        {
            IndexWriterConfig c = new IndexWriterConfig(v, a);
            c.SetSimilarity(ClassEnvRule.Similarity);
            if (VERBOSE)
            {
                // Even though TestRuleSetupAndRestoreClassEnv calls
                // InfoStream.setDefault, we do it again here so that
                // the PrintStreamInfoStream.messageID increments so
                // that when there are separate instances of
                // IndexWriter created we see "IW 0", "IW 1", "IW 2",
                // ... instead of just always "IW 0":
                c.InfoStream = new TestRuleSetupAndRestoreClassEnv.ThreadNameFixingPrintStreamInfoStream(Console.Out);
            }

            if (r.NextBoolean())
            {
                c.SetMergeScheduler(new SerialMergeScheduler());
            }
            else if (Rarely(r))
            {
                int maxThreadCount = TestUtil.NextInt(Random(), 1, 4);
                int maxMergeCount = TestUtil.NextInt(Random(), maxThreadCount, maxThreadCount + 4);
                ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
                cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount);
                c.SetMergeScheduler(cms);
            }
            if (r.NextBoolean())
            {
                if (Rarely(r))
                {
                    // crazy value
                    c.SetMaxBufferedDocs(TestUtil.NextInt(r, 2, 15));
                }
                else
                {
                    // reasonable value
                    c.SetMaxBufferedDocs(TestUtil.NextInt(r, 16, 1000));
                }
            }
            if (r.NextBoolean())
            {
                if (Rarely(r))
                {
                    // crazy value
                    c.SetTermIndexInterval(r.NextBoolean() ? TestUtil.NextInt(r, 1, 31) : TestUtil.NextInt(r, 129, 1000));
                }
                else
                {
                    // reasonable value
                    c.SetTermIndexInterval(TestUtil.NextInt(r, 32, 128));
                }
            }
            if (r.NextBoolean())
            {
                int maxNumThreadStates = Rarely(r) ? TestUtil.NextInt(r, 5, 20) : TestUtil.NextInt(r, 1, 4); // reasonable value -  crazy value

                if (Rarely(r))
                {
                    // Retrieve the package-private setIndexerThreadPool
                    // method:
                    MethodInfo setIndexerThreadPoolMethod = typeof(IndexWriterConfig).GetMethod("SetIndexerThreadPool", new Type[] { typeof(DocumentsWriterPerThreadPool) });
                    //setIndexerThreadPoolMethod.setAccessible(true);
                    Type clazz = typeof(RandomDocumentsWriterPerThreadPool);
                    ConstructorInfo ctor = clazz.GetConstructor(new[] { typeof(int), typeof(Random) });
                    //ctor.Accessible = true;
                    // random thread pool
                    setIndexerThreadPoolMethod.Invoke(c, new[] { ctor.Invoke(new object[] { maxNumThreadStates, r }) });
                }
                else
                {
                    // random thread pool
                    c.SetMaxThreadStates(maxNumThreadStates);
                }
            }

            c.SetMergePolicy(NewMergePolicy(r));

            if (Rarely(r))
            {
                c.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.InfoStream));
            }
            c.SetUseCompoundFile(r.NextBoolean());
            c.SetReaderPooling(r.NextBoolean());
            c.SetReaderTermsIndexDivisor(TestUtil.NextInt(r, 1, 4));
            c.SetCheckIntegrityAtMerge(r.NextBoolean());
            return c;
        }
        public virtual void TestDeletesCheckIndexOutput()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMaxBufferedDocs(2);
            IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());
            Document doc = new Document();
            doc.Add(NewField("field", "0", StringField.TYPE_NOT_STORED));
            w.AddDocument(doc);

            doc = new Document();
            doc.Add(NewField("field", "1", StringField.TYPE_NOT_STORED));
            w.AddDocument(doc);
            w.Commit();
            Assert.AreEqual(1, w.SegmentCount);

            w.DeleteDocuments(new Term("field", "0"));
            w.Commit();
            Assert.AreEqual(1, w.SegmentCount);
            w.Dispose();

            ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
            //MemoryStream bos = new MemoryStream(1024);
            CheckIndex checker = new CheckIndex(dir);
            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
            CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
            Assert.IsTrue(indexStatus.Clean);
            checker.FlushInfoStream();
            string s = bos.ToString();

            // Segment should have deletions:
            Assert.IsTrue(s.Contains("has deletions"), "string was: " + s);
            w = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());
            w.ForceMerge(1);
            w.Dispose();

            bos = new ByteArrayOutputStream(1024);
            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
            indexStatus = checker.DoCheckIndex(null);
            Assert.IsTrue(indexStatus.Clean);
            checker.FlushInfoStream();
            s = bos.ToString();
            Assert.IsFalse(s.Contains("has deletions"));
            dir.Dispose();
        }
        public virtual void TestLiveChangeToCFS()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy(true));
            // Start false:
            iwc.SetUseCompoundFile(false);
            iwc.MergePolicy.NoCFSRatio = 0.0d;
            IndexWriter w = new IndexWriter(dir, iwc);
            // Change to true:
            w.Config.SetUseCompoundFile(true);

            Document doc = new Document();
            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit");

            doc.Add(NewStringField("field", "foo", Store.NO));
            w.AddDocument(doc);
            w.Commit();
            w.ForceMerge(1);
            w.Commit();

            // no compound files after merge
            Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge");

            MergePolicy lmp = w.Config.MergePolicy;
            lmp.NoCFSRatio = 1.0;
            lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity;

            w.AddDocument(doc);
            w.ForceMerge(1);
            w.Commit();
            Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge");
            w.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            Collections.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType(prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                        // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.GetTerms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Count);
            TermsEnum termsEnum = terms.GetIterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq);
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void TestDocsWithField()
        {
            AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField);
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                if (Random.Next(4) >= 0)
                {
                    doc.Add(new NumericDocValuesField("numbers", Random.NextInt64()));
                }
                doc.Add(new NumericDocValuesField("numbersAlways", Random.NextInt64()));
                iw.AddDocument(doc);
                if (Random.Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.GetReader();

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.GetReader();
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            IBits multi  = MultiDocValues.GetDocsWithField(ir, "numbers");
            IBits single = merged.GetDocsWithField("numbers");

            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.Length, multi.Length);
                for (int i = 0; i < numDocs; i++)
                {
                    Assert.AreEqual(single.Get(i), multi.Get(i));
                }
            }

            multi  = MultiDocValues.GetDocsWithField(ir, "numbersAlways");
            single = merged.GetDocsWithField("numbersAlways");
            Assert.AreEqual(single.Length, multi.Length);
            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            Document doc = new Document();
            var bytes = new byte[4];
            ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
            BytesRef data = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                encoder.Reset(bytes);
                encoder.WriteVInt(i % 65535); // 1, 2, or 3 bytes
                data.Length = encoder.Position;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            int expectedValue = 0;
            ByteArrayDataInput input = new ByteArrayDataInput();
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                BytesRef scratch = new BytesRef(bytes);
                BinaryDocValues dv = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    dv.Get(i, scratch);
                    input.Reset((byte[])(Array)scratch.Bytes, scratch.Offset, scratch.Length);
                    Assert.AreEqual(expectedValue % 65535, input.ReadVInt());
                    Assert.IsTrue(input.Eof());
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Exemple #52
0
        public virtual void TestRandom()
        {
            int                    numThreads          = 1 + Random.Next(8);
            int                    numDocumentsToIndex = 50 + AtLeast(70);
            AtomicInt32            numDocs             = new AtomicInt32(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            IndexWriterConfig      iwc         = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();

            iwc.SetFlushPolicy(flushPolicy);

            int numDWPT = 1 + Random.Next(8);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);

            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.flushControl;

            Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due");
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            if (flushPolicy.FlushOnRAM && !flushPolicy.FlushOnDocCount && !flushPolicy.FlushOnDeleteTerms)
            {
                long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
                Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
                if (flushPolicy.HasMarkedPending)
                {
                    assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, maxRAMBytes <= flushControl.peakActiveBytes);
                }
            }
            AssertActiveBytesAfter(flushControl);
            writer.Commit();
            Assert.AreEqual(0, flushControl.ActiveBytes);
            IndexReader r = DirectoryReader.Open(dir);

            Assert.AreEqual(numDocumentsToIndex, r.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, r.MaxDoc);
            if (!flushPolicy.FlushOnRAM)
            {
                assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.WasStalled);
                assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.HasBlocked);
            }
            r.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Exemple #53
0
        public virtual void TestInvalidValues()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Test IndexDeletionPolicy
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType());
            conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
            Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.IndexDeletionPolicy.GetType());
            try
            {
                conf.SetIndexDeletionPolicy(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            // Test MergeScheduler
#if FEATURE_TASKMERGESCHEDULER
            Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType());
#else
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
#endif
            conf.SetMergeScheduler(new SerialMergeScheduler());
            Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType());
            try
            {
                conf.SetMergeScheduler(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            // Test Similarity:
            // we shouldnt assert what the default is, just that its not null.
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            conf.SetSimilarity(new MySimilarity());
            Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType());
            try
            {
                conf.SetSimilarity(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            // Test IndexingChain
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            conf.SetIndexingChain(new MyIndexingChain());
            Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType());
            try
            {
                conf.SetIndexingChain(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }

            try
            {
                conf.SetMaxBufferedDeleteTerms(0);
                Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                conf.SetMaxBufferedDocs(1);
                Assert.Fail("should not have succeeded to set maxBufferedDocs to 1");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                // Disable both MAX_BUF_DOCS and RAM_SIZE_MB
                conf.SetMaxBufferedDocs(4);
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
            try
            {
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            // Test setReaderTermsIndexDivisor
            try
            {
                conf.SetReaderTermsIndexDivisor(0);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to 0");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            // Setting to -1 is ok
            conf.SetReaderTermsIndexDivisor(-1);
            try
            {
                conf.SetReaderTermsIndexDivisor(-2);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(2048);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(0);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0");
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // this is expected
            }

            // Test MergePolicy
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            conf.SetMergePolicy(new LogDocMergePolicy());
            Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType());
            try
            {
                conf.SetMergePolicy(null);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException e)
#pragma warning restore 168
            {
                // ok
            }
        }
Exemple #54
0
        // [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestBigDocuments()
        {
            // "big" as "much bigger than the chunk size"
            // for this test we force a FS dir
            // we can't just use newFSDirectory, because this test doesn't really index anything.
            // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
            Directory         dir    = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments")));
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            Document emptyDoc = new Document(); // emptyDoc
            Document bigDoc1  = new Document(); // lot of small fields
            Document bigDoc2  = new Document(); // 1 very big field

            Field idField = new StringField("id", "", Field.Store.NO);

            emptyDoc.Add(idField);
            bigDoc1.Add(idField);
            bigDoc2.Add(idField);

            FieldType onlyStored = new FieldType(StringField.TYPE_STORED);

            onlyStored.IsIndexed = false;

            Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored);
            int   numFields  = RandomInts.NextIntBetween(Random(), 500000, 1000000);

            for (int i = 0; i < numFields; ++i)
            {
                bigDoc1.Add(smallField);
            }

            Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored);

            bigDoc2.Add(bigField);

            int numDocs = AtLeast(5);

            Document[] docs = new Document[numDocs];
            for (int i = 0; i < numDocs; ++i)
            {
                docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2));
            }
            for (int i = 0; i < numDocs; ++i)
            {
                idField.SetStringValue("" + i);
                iw.AddDocument(docs[i]);
                if (Random().Next(numDocs) == 0)
                {
                    iw.Commit();
                }
            }
            iw.Commit();
            iw.ForceMerge(1); // look at what happens when big docs are merged
            DirectoryReader rd       = DirectoryReader.Open(dir);
            IndexSearcher   searcher = new IndexSearcher(rd);

            for (int i = 0; i < numDocs; ++i)
            {
                Query   query   = new TermQuery(new Term("id", "" + i));
                TopDocs topDocs = searcher.Search(query, 1);
                Assert.AreEqual(1, topDocs.TotalHits, "" + i);
                Document doc = rd.Document(topDocs.ScoreDocs[0].Doc);
                Assert.IsNotNull(doc);
                IIndexableField[] fieldValues = doc.GetFields("fld");
                Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length);
                if (fieldValues.Length > 0)
                {
                    Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue());
                }
            }
            rd.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                            .SetRAMBufferSizeMB(256.0)
                            .SetMergeScheduler(scheduler)
                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                            .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;
            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms = true;
            MyTokenStream tokenStream = new MyTokenStream();
            Field field = new Field("field", tokenStream, ft);
            doc.Add(field);

            const int numDocs = 1000;
            for (int i = 0; i < numDocs; i++)
            {
                if (i % 2 == 1) // trick blockPF's little optimization
                {
                    tokenStream.n = 65536;
                }
                else
                {
                    tokenStream.n = 65537;
                }
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            w.Dispose();

            DirectoryReader oneThousand = DirectoryReader.Open(dir);
            IndexReader[] subReaders = new IndexReader[1000];
            Arrays.Fill(subReaders, oneThousand);
            MultiReader mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2"));
            if (dir2 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
            w2.AddIndexes(mr);
            w2.ForceMerge(1);
            w2.Dispose();
            oneThousand.Dispose();

            DirectoryReader oneMillion = DirectoryReader.Open(dir2);
            subReaders = new IndexReader[2000];
            Arrays.Fill(subReaders, oneMillion);
            mr = new MultiReader(subReaders);
            BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3"));
            if (dir3 is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
            w3.AddIndexes(mr);
            w3.ForceMerge(1);
            w3.Dispose();
            oneMillion.Dispose();

            dir.Dispose();
            dir2.Dispose();
            dir3.Dispose();
        }
Exemple #56
0
        public virtual void TestDefaults()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            Assert.AreEqual(typeof(MockAnalyzer), conf.Analyzer.GetType());
            Assert.IsNull(conf.IndexCommit);
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType());
#if FEATURE_TASKMERGESCHEDULER
            Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType());
#else
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
#endif
            Assert.AreEqual(OpenMode.CREATE_OR_APPEND, conf.OpenMode);
            // we don't need to assert this, it should be unspecified
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.TermIndexInterval);
            Assert.AreEqual(IndexWriterConfig.DefaultWriteLockTimeout, conf.WriteLockTimeout);
            Assert.AreEqual(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.DefaultWriteLockTimeout);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.MaxBufferedDeleteTerms);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.RAMBufferSizeMB, 0.0);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.MaxBufferedDocs);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_POOLING, conf.UseReaderPooling);
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            Assert.IsNull(conf.MergedSegmentWarmer);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.ReaderTermsIndexDivisor);
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            Assert.AreEqual(typeof(ThreadAffinityDocumentsWriterThreadPool), conf.IndexerThreadPool.GetType());
            Assert.AreEqual(typeof(FlushByRamOrCountsPolicy), conf.FlushPolicy.GetType());
            Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.RAMPerThreadHardLimitMB);
            Assert.AreEqual(Codec.Default, conf.Codec);
            Assert.AreEqual(InfoStream.Default, conf.InfoStream);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM, conf.UseCompoundFile);
            // Sanity check - validate that all getters are covered.
            HashSet <string> getters = new HashSet <string>();
            getters.Add("getAnalyzer");
            getters.Add("getIndexCommit");
            getters.Add("getIndexDeletionPolicy");
            getters.Add("getMaxFieldLength");
            getters.Add("getMergeScheduler");
            getters.Add("getOpenMode");
            getters.Add("getSimilarity");
            getters.Add("getTermIndexInterval");
            getters.Add("getWriteLockTimeout");
            getters.Add("getDefaultWriteLockTimeout");
            getters.Add("getMaxBufferedDeleteTerms");
            getters.Add("getRAMBufferSizeMB");
            getters.Add("getMaxBufferedDocs");
            getters.Add("getIndexingChain");
            getters.Add("getMergedSegmentWarmer");
            getters.Add("getMergePolicy");
            getters.Add("getMaxThreadStates");
            getters.Add("getReaderPooling");
            getters.Add("getIndexerThreadPool");
            getters.Add("getReaderTermsIndexDivisor");
            getters.Add("getFlushPolicy");
            getters.Add("getRAMPerThreadHardLimitMB");
            getters.Add("getCodec");
            getters.Add("getInfoStream");
            getters.Add("getUseCompoundFile");

            foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods())
            {
                if (m.DeclaringType == typeof(IndexWriterConfig) && m.Name.StartsWith("get", StringComparison.Ordinal) && !m.Name.StartsWith("get_", StringComparison.Ordinal))
                {
                    Assert.IsTrue(getters.Contains(m.Name), "method " + m.Name + " is not tested for defaults");
                }
            }
        }
        public virtual void TestInvalidValues()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Test IndexDeletionPolicy
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType());
            conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
            Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType());
            try
            {
                conf.SetIndexDeletionPolicy(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test MergeScheduler
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
            conf.SetMergeScheduler(new SerialMergeScheduler());
            Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType());
            try
            {
                conf.SetMergeScheduler(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test Similarity:
            // we shouldnt assert what the default is, just that its not null.
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            conf.SetSimilarity(new MySimilarity());
            Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType());
            try
            {
                conf.SetSimilarity(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            // Test IndexingChain
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            conf.SetIndexingChain(new MyIndexingChain());
            Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType());
            try
            {
                conf.SetIndexingChain(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }

            try
            {
                conf.SetMaxBufferedDeleteTerms(0);
                Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetMaxBufferedDocs(1);
                Assert.Fail("should not have succeeded to set maxBufferedDocs to 1");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                // Disable both MAX_BUF_DOCS and RAM_SIZE_MB
                conf.SetMaxBufferedDocs(4);
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
            try
            {
                conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
                Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Test setReaderTermsIndexDivisor
            try
            {
                conf.SetReaderTermsIndexDivisor(0);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Setting to -1 is ok
            conf.SetReaderTermsIndexDivisor(-1);
            try
            {
                conf.SetReaderTermsIndexDivisor(-2);
                Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(2048);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            try
            {
                conf.SetRAMPerThreadHardLimitMB(0);
                Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0");
            }
            catch (System.ArgumentException e)
            {
                // this is expected
            }

            // Test MergePolicy
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            conf.SetMergePolicy(new LogDocMergePolicy());
            Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType());
            try
            {
                conf.SetMergePolicy(null);
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // ok
            }
        }
Exemple #58
0
        public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            Document             doc     = new Document();
            var                  bytes   = new byte[4];
            ByteArrayDataOutput  encoder = new ByteArrayDataOutput(bytes);
            BytesRef             data    = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);

            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                encoder.Reset(bytes);
                encoder.WriteVInt32(i % 65535); // 1, 2, or 3 bytes
                data.Length = encoder.Position;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader    r             = DirectoryReader.Open(dir);
            int                expectedValue = 0;
            ByteArrayDataInput input         = new ByteArrayDataInput();

            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader    reader  = context.AtomicReader;
                BytesRef        scratch = new BytesRef(bytes);
                BinaryDocValues dv      = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    dv.Get(i, scratch);
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    Assert.AreEqual(expectedValue % 65535, input.ReadVInt32());
                    Assert.IsTrue(input.Eof);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestDefaults()
        {
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            Assert.AreEqual(typeof(MockAnalyzer), conf.Analyzer.GetType());
            Assert.IsNull(conf.IndexCommit);
            Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType());
            Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType());
            Assert.AreEqual(OpenMode_e.CREATE_OR_APPEND, conf.OpenMode);
            // we don't need to assert this, it should be unspecified
            Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.TermIndexInterval);
            Assert.AreEqual(IndexWriterConfig.DefaultWriteLockTimeout, conf.WriteLockTimeout);
            Assert.AreEqual(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.DefaultWriteLockTimeout);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.MaxBufferedDeleteTerms);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.RAMBufferSizeMB, 0.0);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.MaxBufferedDocs);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_POOLING, conf.ReaderPooling);
            Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain);
            Assert.IsNull(conf.MergedSegmentWarmer);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.ReaderTermsIndexDivisor);
            Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType());
            Assert.AreEqual(typeof(ThreadAffinityDocumentsWriterThreadPool), conf.IndexerThreadPool.GetType());
            Assert.AreEqual(typeof(FlushByRamOrCountsPolicy), conf.FlushPolicy.GetType());
            Assert.AreEqual(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.RAMPerThreadHardLimitMB);
            Assert.AreEqual(Codec.Default, conf.Codec);
            Assert.AreEqual(InfoStream.Default, conf.InfoStream);
            Assert.AreEqual(IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM, conf.UseCompoundFile);
            // Sanity check - validate that all getters are covered.
            HashSet<string> getters = new HashSet<string>();
            getters.Add("getAnalyzer");
            getters.Add("getIndexCommit");
            getters.Add("getIndexDeletionPolicy");
            getters.Add("getMaxFieldLength");
            getters.Add("getMergeScheduler");
            getters.Add("getOpenMode");
            getters.Add("getSimilarity");
            getters.Add("getTermIndexInterval");
            getters.Add("getWriteLockTimeout");
            getters.Add("getDefaultWriteLockTimeout");
            getters.Add("getMaxBufferedDeleteTerms");
            getters.Add("getRAMBufferSizeMB");
            getters.Add("getMaxBufferedDocs");
            getters.Add("getIndexingChain");
            getters.Add("getMergedSegmentWarmer");
            getters.Add("getMergePolicy");
            getters.Add("getMaxThreadStates");
            getters.Add("getReaderPooling");
            getters.Add("getIndexerThreadPool");
            getters.Add("getReaderTermsIndexDivisor");
            getters.Add("getFlushPolicy");
            getters.Add("getRAMPerThreadHardLimitMB");
            getters.Add("getCodec");
            getters.Add("getInfoStream");
            getters.Add("getUseCompoundFile");

            foreach (MethodInfo m in typeof(IndexWriterConfig).GetMethods())
            {
                if (m.DeclaringType == typeof(IndexWriterConfig) && m.Name.StartsWith("get") && !m.Name.StartsWith("get_"))
                {
                    Assert.IsTrue(getters.Contains(m.Name), "method " + m.Name + " is not tested for defaults");
                }
            }
        }
Exemple #60
0
        public virtual void TestFixedBinary([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }
            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(newScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            Document             doc     = new Document();
            var                  bytes   = new byte[4];
            BytesRef             data    = new BytesRef(bytes);
            BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);

            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 24);
                bytes[1] = (byte)(i >> 16);
                bytes[2] = (byte)(i >> 8);
                bytes[3] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r             = DirectoryReader.Open(dir);
            int             expectedValue = 0;

            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader    reader  = context.AtomicReader;
                BytesRef        scratch = new BytesRef();
                BinaryDocValues dv      = reader.GetBinaryDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 24);
                    bytes[1] = (byte)(expectedValue >> 16);
                    bytes[2] = (byte)(expectedValue >> 8);
                    bytes[3] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }