public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
/// <summary> /// Subclass can override to customize per-dim Facets /// impl. /// </summary> protected virtual Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims) { Facets drillDownFacets; var drillSidewaysFacets = new Dictionary <string, Facets>(); if (m_taxoReader != null) { drillDownFacets = new FastTaxonomyFacetCounts(m_taxoReader, m_config, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.Length; i++) { drillSidewaysFacets[drillSidewaysDims[i]] = new FastTaxonomyFacetCounts(m_taxoReader, m_config, drillSideways[i]); } } } else { drillDownFacets = new SortedSetDocValuesFacetCounts(m_state, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.Length; i++) { drillSidewaysFacets[drillSidewaysDims[i]] = new SortedSetDocValuesFacetCounts(m_state, drillSideways[i]); } } } if (drillSidewaysFacets.Count == 0) { return(drillDownFacets); } else { return(new MultiFacets(drillSidewaysFacets, drillDownFacets)); } }
/// <summary> /// Subclass can override to customize per-dim Facets /// impl. /// </summary> protected virtual Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims) { Facets drillDownFacets; var drillSidewaysFacets = new Dictionary<string, Facets>(); if (taxoReader != null) { drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.Length; i++) { drillSidewaysFacets[drillSidewaysDims[i]] = new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]); } } } else { drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.Length; i++) { drillSidewaysFacets[drillSidewaysDims[i]] = new SortedSetDocValuesFacetCounts(state, drillSideways[i]); } } } if (drillSidewaysFacets.Count == 0) { return drillDownFacets; } else { return new MultiFacets(drillSidewaysFacets, drillDownFacets); } }
public virtual void TestRandom() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); string[] tokens = GetRandomTokens(10); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList<TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new SortedSetDocValuesFacetCounts(state, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List<Dictionary<string, int?>>(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary<string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int? v; if (!expectedCounts[j].TryGetValue(doc.dims[j],out v)) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List<FacetResult> expected = new List<FacetResult>(); for (int i = 0; i < numDims; i++) { List<LabelAndValue> labelValues = new List<LabelAndValue>(); int totCount = 0; foreach (KeyValuePair<string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList<FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties //sortTies(actual); CollectionAssert.AreEqual(expected, actual); } IOUtils.Close(w, searcher.IndexReader, indexDir, taxoDir); }
public virtual void TestSlowCompositeReaderWrapper() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader)); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestSparseFacets() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }