Exemple #1
0
        public virtual void TestDeletePartiallyWrittenFilesIfAbort()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30));
            iwConf.SetCodec(CompressingCodec.RandomInstance(Random()));
            // disable CFS because this test checks file names
            iwConf.SetMergePolicy(NewLogMergePolicy(false));
            iwConf.SetUseCompoundFile(false);
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf);

            Document validDoc = new Document();

            validDoc.Add(new IntField("id", 0, Field.Store.YES));
            iw.AddDocument(validDoc);
            iw.Commit();

            // make sure that #writeField will fail to trigger an abort
            Document  invalidDoc = new Document();
            FieldType fieldType  = new FieldType();

            fieldType.Stored = true;
            invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType));

            Assert.Throws <ArgumentException>(() =>
            {
                try
                {
                    iw.AddDocument(invalidDoc);
                    iw.Commit();
                }
                finally
                {
                    int counter = 0;
                    foreach (string fileName in dir.ListAll())
                    {
                        if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx"))
                        {
                            counter++;
                        }
                    }
                    // Only one .fdt and one .fdx files must have been found
                    Assert.AreEqual(2, counter);
                    iw.Dispose();
                    dir.Dispose();
                }
            });
        }
        /// <summary>
        /// LUCENENET specific
        /// Non-static because NewIndexWriterConfig is now non-static
        /// </summary>
        protected internal void CreateIndex(bool doMultiSegment)
        {
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp");
            }
            // prepare a small index with just a few documents.
            dir   = NewDirectory();
            anlzr = new MockAnalyzer(Random);
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, anlzr).SetMergePolicy(NewLogMergePolicy());

            if (doMultiSegment)
            {
                iwc.SetMaxBufferedDocs(TestUtil.NextInt32(Random, 2, 7));
            }
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
            // add docs not exactly in natural ID order, to verify we do check the order of docs by scores
            int remaining = N_DOCS;

            bool[] done = new bool[N_DOCS];
            int    i    = 0;

            while (remaining > 0)
            {
                if (done[i])
                {
                    throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
                }
                AddDoc(iw, i);
                done[i] = true;
                i       = (i + 4) % N_DOCS;
                remaining--;
            }
            if (!doMultiSegment)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: setUp full merge");
                }
                iw.ForceMerge(1);
            }
            iw.Dispose();
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp done close");
            }
        }
        /** Creates an index for sorting. */
        public void CreateIndex(Directory dir, int numDocs, Random random)
        {
            IList <int> ids = new List <int>();

            for (int i = 0; i < numDocs; i++)
            {
                ids.Add(i * 10);
            }
            // shuffle them for indexing
            // LUCENENET NOTE: Using LINQ, so we need to reassign the variable with the result
            ids = CollectionsHelper.Shuffle(ids);

            if (VERBOSE)
            {
                Console.WriteLine("Shuffled IDs for indexing: " + Arrays.ToString(ids.ToArray()));
            }

            PositionsTokenStream positions = new PositionsTokenStream();
            IndexWriterConfig    conf      = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetMaxBufferedDocs(4);                               // create some segments
            conf.SetSimilarity(new NormsSimilarity(conf.Similarity)); // for testing norms field
            using (RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf))
            {
                writer.RandomForceMerge = (false);
                foreach (int id in ids)
                {
                    writer.AddDocument(Doc(id, positions));
                }
                // delete some documents
                writer.Commit();
                foreach (int id in ids)
                {
                    if (random.NextDouble() < 0.2)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("delete doc_id " + id);
                        }
                        writer.DeleteDocuments(new Term(ID_FIELD, id.ToString()));
                    }
                }
            }
        }
        public void ApplyToWriterConfig(IndexWriterConfig config)
        {
            try
            {
                // possibly take in a MergePolicy or configure it elsewhere
                var mergePolicy = new LogByteSizeMergePolicy();
                if (MergeFactor != null)
                {
                    mergePolicy.MergeFactor = (int)MergeFactor;
                }

                if (MaxMergeDocs != null)
                {
                    mergePolicy.MaxMergeDocs = (int)MaxMergeDocs;
                }

                config.MergePolicy = mergePolicy;

                if (MaxBufferedDocs != null)
                {
                    config.SetMaxBufferedDocs((int)MaxBufferedDocs);
                }

                if (RamBufferSizeMb != null)
                {
                    config.SetRAMBufferSizeMB((int)RamBufferSizeMb);
                }

                if (TermIndexInterval != null)
                {
                    config.SetTermIndexInterval((int)TermIndexInterval);
                }
            }
            catch (ArgumentOutOfRangeException)
            {
                // TODO: Log it
            }
        }
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
Exemple #6
0
        public virtual void TestChangeCodecAndMerge()
        {
            Directory dir = NewDirectory();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: make new index");
            }
            IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                                            new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec());

            iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10);
            IndexWriter writer = NewWriter(dir, iwconf);

            AddDocs(writer, 10);
            writer.Commit();
            AssertQuery(new Term("content", "aaa"), dir, 10);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: addDocs3");
            }
            AddDocs3(writer, 10);
            writer.Commit();
            writer.Dispose();

            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "aaa"), dir, 10);
            Codec codec = iwconf.Codec;

            iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                     .SetOpenMode(OpenMode.APPEND).SetCodec(codec);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0);
            // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10);
            iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);

            iwconf.SetCodec(new MockCodec2()); // uses standard for field content
            writer = NewWriter(dir, iwconf);
            // swap in new codec for currently written segments
            if (VERBOSE)
            {
                Console.WriteLine("TEST: add docs w/ Standard codec for content field");
            }
            AddDocs2(writer, 10);
            writer.Commit();
            codec = iwconf.Codec;
            Assert.AreEqual(30, writer.MaxDoc);
            AssertQuery(new Term("content", "bbb"), dir, 10);
            AssertQuery(new Term("content", "ccc"), dir, 10); ////
            AssertQuery(new Term("content", "aaa"), dir, 10);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: add more docs w/ new codec");
            }
            AddDocs2(writer, 10);
            writer.Commit();
            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "bbb"), dir, 20);
            AssertQuery(new Term("content", "aaa"), dir, 10);
            Assert.AreEqual(40, writer.MaxDoc);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now optimize");
            }
            writer.ForceMerge(1);
            Assert.AreEqual(40, writer.MaxDoc);
            writer.Dispose();
            AssertQuery(new Term("content", "ccc"), dir, 10);
            AssertQuery(new Term("content", "bbb"), dir, 20);
            AssertQuery(new Term("content", "aaa"), dir, 10);

            dir.Dispose();
        }