Exemple #1
0
        public virtual void TestSeparateIndexedFields()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            var          taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter  iw         = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            FacetsConfig config     = new FacetsConfig();

            config.SetIndexFieldName("b", "$b");

            for (int i = AtLeast(30); i > 0; --i)
            {
                Document doc = new Document();
                doc.Add(new StringField("f", "v", Field.Store.NO));
                doc.Add(new FacetField("a", "1"));
                doc.Add(new FacetField("b", "1"));
                iw.AddDocument(config.Build(taxoWriter, doc));
            }

            DirectoryReader r          = DirectoryReader.Open(iw, true);
            var             taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector sfc = new FacetsCollector();

            NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
            Facets facets1 = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            Facets facets2 = GetTaxonomyFacetCounts(taxoReader, config, sfc, "$b");

            Assert.AreEqual(r.MaxDoc, (int)facets1.GetTopChildren(10, "a").Value);
            Assert.AreEqual(r.MaxDoc, (int)facets2.GetTopChildren(10, "b").Value);
            IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
        }
Exemple #2
0
        public virtual void TestChildCount()
        {
            // LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
            var indexDir = NewDirectory();
            var taxoDir  = NewDirectory();

            var          taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter  iw         = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            FacetsConfig config     = new FacetsConfig();

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(new FacetField("a", Convert.ToString(i)));
                iw.AddDocument(config.Build(taxoWriter, doc));
            }

            DirectoryReader         r          = DirectoryReader.Open(iw, true);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector sfc = new FacetsCollector();

            NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);

            Assert.AreEqual(10, facets.GetTopChildren(2, "a").ChildCount);

            IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
        }
        public virtual void TestMultiValuedHierarchy()
        {
            Store.Directory         dir        = NewDirectory();
            Store.Directory         taxoDir    = NewDirectory();
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
            FacetsConfig            config     = new FacetsConfig();

            config.SetHierarchical("a", true);
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path", "x"));
            doc.Add(new FacetField("a", "path", "y"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            try
            {
                facets.GetSpecificValue("a");
                fail("didn't hit expected exception");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // expected
            }

            FacetResult result = facets.GetTopChildren(10, "a");

            Assert.AreEqual(1, result.LabelValues.Length);
            Assert.AreEqual(1, (int)result.LabelValues[0].Value);

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestManyFacetsInOneDocument()
        {
            AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer  = new RandomIndexWriter(Random(), dir, iwc);
            var taxoWriter            = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("dim", true);

            int numLabels = TestUtil.NextInt(Random(), 40000, 100000);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            for (int i = 0; i < numLabels; i++)
            {
                doc.Add(new FacetField("dim", "" + i));
            }
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            FacetResult result = facets.GetTopChildren(int.MaxValue, "dim");

            Assert.AreEqual(numLabels, result.LabelValues.Length);
            var allLabels = new HashSet <string>();

            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                allLabels.Add(labelValue.Label);
                Assert.AreEqual(1, (int)labelValue.Value);
            }
            Assert.AreEqual(numLabels, allLabels.Count);

            IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir);
        }
Exemple #5
0
        public static void TestFlexLuceneRAM(string[] args)
        {
            StandardAnalyzer analyzer = new StandardAnalyzer();

            FlexLucene.Store.Directory index = (FlexLucene.Store.Directory) new RAMDirectory();
            config = new IndexWriterConfig((Analyzer)analyzer);
            cnf    = new FacetsConfig();
            cnf.SetIndexFieldName("title", "facet_title");
            cnf.SetIndexFieldName("isbn", "facet_isbn");
            LuceneTest.taxoDir    = (FlexLucene.Store.Directory) new RAMDirectory();
            LuceneTest.taxoWriter = (TaxonomyWriter) new FlexLucene.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter(LuceneTest.taxoDir, IndexWriterConfigOpenMode.CREATE);

            IndexWriter w = new IndexWriter(index, LuceneTest.config);

            addDoc(w, "Lucene in Action", "9900001");
            addDoc(w, "Lucene for Dummies", "9900002");
            addDoc(w, "Lucene for Dummies 2", "9900003");

            w.close();
            String               querystr    = "isbn:99*";
            Query                q           = new QueryParser("title", (Analyzer)analyzer).Parse(querystr);
            int                  hitsPerPage = 10;
            IndexReader          reader      = (IndexReader)DirectoryReader.Open(index);
            IndexSearcher        searcher    = new IndexSearcher(reader);
            TopScoreDocCollector collector   = TopScoreDocCollector.Create(hitsPerPage);

            searcher.Search(q, (Collector)collector);
            ScoreDoc[] hits = collector.TopDocs().ScoreDocs;
            Console.WriteLine("Found " + hits.Length + " hits.");
            for (int i = 0; i < hits.Length; ++i)
            {
                int      docId = hits [i].Doc;
                Document d     = searcher.Doc(docId);
                Console.WriteLine(i + 1 + ". " + d.Get("isbn") + "\t" + d.Get("title"));
            }
            SortedSetDocValuesReaderState state = (SortedSetDocValuesReaderState) new DefaultSortedSetDocValuesReaderState(reader, "facet_isbn");
            FacetsCollector fc = new FacetsCollector();

            FacetsCollector.Search(searcher, q, 10, (Collector)fc);
            Facets      facets = (Facets) new SortedSetDocValuesFacetCounts(state, fc);
            FacetResult result = facets.GetTopChildren(10, "isbn", new String[0]);

            for (int j = 0; j < result.ChildCount; ++j)
            {
                LabelAndValue lv = result.LabelValues [j];
                Console.WriteLine(String.Format("Label={0}, Value={1}", lv.Label, lv.Value));
            }
            reader.close();
        }
Exemple #6
0
        public virtual void TestAllCounts()
        {
            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var             taxoReader  = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher   searcher    = NewSearcher(indexReader);

            FacetsCollector sfc = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), sfc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc);

            FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A);

            Assert.AreEqual(-1, (int)result.Value);
            int prevValue = int.MaxValue;

            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(allExpectedCounts[CP_A + "/" + labelValue.Label], labelValue.Value);
                Assert.True((int)labelValue.Value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.Value + " prevValue=" + prevValue);
                prevValue = (int)labelValue.Value;
            }

            result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B);
            Assert.AreEqual(allExpectedCounts[CP_B], result.Value);
            prevValue = int.MaxValue;
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(allExpectedCounts[CP_B + "/" + labelValue.Label], labelValue.Value);
                Assert.True((int)labelValue.Value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.Value + " prevValue=" + prevValue);
                prevValue = (int)labelValue.Value;
            }

            IOUtils.Dispose(indexReader, taxoReader);
        }
        public virtual void TestSegmentsWithoutCategoriesOrResults()
        {
            // tests the accumulator when there are segments with no results
            var indexDir = NewDirectory();
            var taxoDir  = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges
            IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

            var          taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config     = new FacetsConfig();

            indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true);    // 2nd segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true);  // 3rd segment ok
            indexTwoDocs(taxoWriter, indexWriter, null, false);   // 4th segment, no content, or categories
            indexTwoDocs(taxoWriter, indexWriter, null, true);    // 5th segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true);  // 6th segment, with content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true);    // 7th segment, with content, no categories
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader   = DirectoryReader.Open(indexDir);
            var             taxoReader    = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher   indexSearcher = NewSearcher(indexReader);

            // search for "f:a", only segments 1 and 3 should match results
            Query           q   = new TermQuery(new Term("f", "a"));
            FacetsCollector sfc = new FacetsCollector();

            indexSearcher.Search(q, sfc);
            Facets      facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            FacetResult result = facets.GetTopChildren(10, "A");

            Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children");
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(2, (int)labelValue.Value, "wrong weight for child " + labelValue.Label);
            }

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
        public virtual void TestLabelWithDelimiter()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            RandomIndexWriter writer  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("dim", true);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "test\u001Fone"));
            doc.Add(new FacetField("dim", "test\u001Etwo"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo"));

            FacetResult result = facets.GetTopChildren(10, "dim");

            Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n  test\u001Fone (1)\n  test\u001Etwo (1)\n", result.ToString());
            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
Exemple #11
0
        public virtual void Test_Directory() // LUCENENET specific - name collides with property of LuceneTestCase
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            //int ordLimit = TestNightly ? 100000 : 6000;
            // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under
            // the 1 hour free limit of Azure DevOps, this was reduced to 30000.
            int ordLimit = TestNightly ? 30000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop)
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.IsTrue(result.ChildCount > 0);
                            Assert.IsTrue(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir);
        }