public virtual void TestSparseFacets() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Dispose(writer, searcher.IndexReader, dir); }
public virtual void TestStaleState() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); IndexReader r = writer.GetReader(); SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); writer.AddDocument(config.Build(doc)); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "baz")); writer.AddDocument(config.Build(doc)); IndexSearcher searcher = NewSearcher(writer.GetReader()); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); try { _ = new SortedSetDocValuesFacetCounts(state, c); fail("did not hit expected exception"); } catch (InvalidOperationException) { // expected } r.Dispose(); writer.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestSomeSegmentsMissing() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); writer.AddDocument(config.Build(doc)); writer.Commit(); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestStaleState() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); IndexReader r = writer.Reader; SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); writer.AddDocument(config.Build(doc)); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "baz")); writer.AddDocument(config.Build(doc)); IndexSearcher searcher = NewSearcher(writer.Reader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); try { new SortedSetDocValuesFacetCounts(state, c); Fail("did not hit expected exception"); } catch (InvalidOperationException) { // expected } r.Dispose(); writer.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestStaleState() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); IndexReader r = writer.GetReader(); SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); writer.AddDocument(config.Build(doc)); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "baz")); writer.AddDocument(config.Build(doc)); IndexSearcher searcher = NewSearcher(writer.GetReader()); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); try { _ = new SortedSetDocValuesFacetCounts(state, c); fail("did not hit expected exception"); } catch (Exception ise) when(ise.IsIllegalStateException()) { // expected } r.Dispose(); writer.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestSlowCompositeReaderWrapper() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader)); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestRandom() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); string[] tokens = GetRandomTokens(10); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt32(Random, 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.GetReader()); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random.Next(tokens.Length)]; if (Verbose) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new SortedSetDocValuesFacetCounts(state, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken, StringComparison.Ordinal)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { if (!expectedCounts[j].TryGetValue(doc.dims[j], out int?v)) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties //sortTies(actual); CollectionAssert.AreEqual(expected, actual); } IOUtils.Dispose(w, searcher.IndexReader, indexDir, taxoDir); }
public virtual void TestRandom() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); string[] tokens = GetRandomTokens(10); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList<TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new SortedSetDocValuesFacetCounts(state, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List<Dictionary<string, int?>>(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary<string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int? v; if (!expectedCounts[j].TryGetValue(doc.dims[j],out v)) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List<FacetResult> expected = new List<FacetResult>(); for (int i = 0; i < numDims; i++) { List<LabelAndValue> labelValues = new List<LabelAndValue>(); int totCount = 0; foreach (KeyValuePair<string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList<FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties //sortTies(actual); CollectionAssert.AreEqual(expected, actual); } IOUtils.Close(w, searcher.IndexReader, indexDir, taxoDir); }
public virtual void TestSparseFacets() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }