public virtual void TestSeparateIndexedFields() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("b", "$b"); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); doc.Add(new StringField("f", "v", Field.Store.NO)); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets1 = GetTaxonomyFacetCounts(taxoReader, config, sfc); Facets facets2 = GetTaxonomyFacetCounts(taxoReader, config, sfc, "$b"); Assert.AreEqual(r.MaxDoc, (int)facets1.GetTopChildren(10, "a").Value); Assert.AreEqual(r.MaxDoc, (int)facets2.GetTopChildren(10, "b").Value); IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestChildCount() { // LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator var indexDir = NewDirectory(); var taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new FacetField("a", Convert.ToString(i))); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); Assert.AreEqual(10, facets.GetTopChildren(2, "a").ChildCount); IOUtils.Dispose(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestMultiValuedHierarchy() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "x")); doc.Add(new FacetField("a", "path", "y")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); try { facets.GetSpecificValue("a"); fail("didn't hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } FacetResult result = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, result.LabelValues.Length); Assert.AreEqual(1, (int)result.LabelValues[0].Value); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestManyFacetsInOneDocument() { AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); int numLabels = TestUtil.NextInt(Random(), 40000, 100000); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); for (int i = 0; i < numLabels; i++) { doc.Add(new FacetField("dim", "" + i)); } writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); FacetResult result = facets.GetTopChildren(int.MaxValue, "dim"); Assert.AreEqual(numLabels, result.LabelValues.Length); var allLabels = new HashSet <string>(); foreach (LabelAndValue labelValue in result.LabelValues) { allLabels.Add(labelValue.Label); Assert.AreEqual(1, (int)labelValue.Value); } Assert.AreEqual(numLabels, allLabels.Count); IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir); }
public static void TestFlexLuceneRAM(string[] args) { StandardAnalyzer analyzer = new StandardAnalyzer(); FlexLucene.Store.Directory index = (FlexLucene.Store.Directory) new RAMDirectory(); config = new IndexWriterConfig((Analyzer)analyzer); cnf = new FacetsConfig(); cnf.SetIndexFieldName("title", "facet_title"); cnf.SetIndexFieldName("isbn", "facet_isbn"); LuceneTest.taxoDir = (FlexLucene.Store.Directory) new RAMDirectory(); LuceneTest.taxoWriter = (TaxonomyWriter) new FlexLucene.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter(LuceneTest.taxoDir, IndexWriterConfigOpenMode.CREATE); IndexWriter w = new IndexWriter(index, LuceneTest.config); addDoc(w, "Lucene in Action", "9900001"); addDoc(w, "Lucene for Dummies", "9900002"); addDoc(w, "Lucene for Dummies 2", "9900003"); w.close(); String querystr = "isbn:99*"; Query q = new QueryParser("title", (Analyzer)analyzer).Parse(querystr); int hitsPerPage = 10; IndexReader reader = (IndexReader)DirectoryReader.Open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.Create(hitsPerPage); searcher.Search(q, (Collector)collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; Console.WriteLine("Found " + hits.Length + " hits."); for (int i = 0; i < hits.Length; ++i) { int docId = hits [i].Doc; Document d = searcher.Doc(docId); Console.WriteLine(i + 1 + ". " + d.Get("isbn") + "\t" + d.Get("title")); } SortedSetDocValuesReaderState state = (SortedSetDocValuesReaderState) new DefaultSortedSetDocValuesReaderState(reader, "facet_isbn"); FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, q, 10, (Collector)fc); Facets facets = (Facets) new SortedSetDocValuesFacetCounts(state, fc); FacetResult result = facets.GetTopChildren(10, "isbn", new String[0]); for (int j = 0; j < result.ChildCount; ++j) { LabelAndValue lv = result.LabelValues [j]; Console.WriteLine(String.Format("Label={0}, Value={1}", lv.Label, lv.Value)); } reader.close(); }
public virtual void TestAllCounts() { DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector sfc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A); Assert.AreEqual(-1, (int)result.Value); int prevValue = int.MaxValue; foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(allExpectedCounts[CP_A + "/" + labelValue.Label], labelValue.Value); Assert.True((int)labelValue.Value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.Value + " prevValue=" + prevValue); prevValue = (int)labelValue.Value; } result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B); Assert.AreEqual(allExpectedCounts[CP_B], result.Value); prevValue = int.MaxValue; foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(allExpectedCounts[CP_B + "/" + labelValue.Label], labelValue.Value); Assert.True((int)labelValue.Value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.Value + " prevValue=" + prevValue); prevValue = (int)labelValue.Value; } IOUtils.Dispose(indexReader, taxoReader); }
public virtual void TestSegmentsWithoutCategoriesOrResults() { // tests the accumulator when there are segments with no results var indexDir = NewDirectory(); var taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges IndexWriter indexWriter = new IndexWriter(indexDir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher indexSearcher = NewSearcher(indexReader); // search for "f:a", only segments 1 and 3 should match results Query q = new TermQuery(new Term("f", "a")); FacetsCollector sfc = new FacetsCollector(); indexSearcher.Search(q, sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "A"); Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children"); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(2, (int)labelValue.Value, "wrong weight for child " + labelValue.Label); } IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public virtual void Test_Directory() // LUCENENET specific - name collides with property of LuceneTestCase { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: //int ordLimit = TestNightly ? 100000 : 6000; // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under // the 1 hour free limit of Azure DevOps, this was reduced to 30000. int ordLimit = TestNightly ? 30000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.IsTrue(result.ChildCount > 0); Assert.IsTrue(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (Verbose) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir); }