public virtual void TestRandomLongs() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir); int numDocs = AtLeast(1000); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } long[] values = new long[numDocs]; long minValue = long.MaxValue; long maxValue = long.MinValue; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); long v = Random().NextLong(); values[i] = v; doc.Add(new NumericDocValuesField("field", v)); doc.Add(new LongField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt(Random(), 1, 100); LongRange[] ranges = new LongRange[numRange]; int[] expectedCounts = new int[numRange]; long minAcceptedValue = long.MaxValue; long maxAcceptedValue = long.MinValue; for (int rangeID = 0; rangeID < numRange; rangeID++) { long min; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: LongRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { min = prevRange.min; } else { min = prevRange.max; } } else { min = Random().NextLong(); } long max; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: LongRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { max = prevRange.min; } else { max = prevRange.max; } } else { max = Random().NextLong(); } if (min > max) { long x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random().NextBoolean(); maxIncl = Random().NextBoolean(); } ranges[rangeID] = new LongRange("r" + rangeID, min, minIncl, max, maxIncl); if (VERBOSE) { Console.WriteLine(" range " + rangeID + ": " + ranges[rangeID]); } // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random().NextBoolean()) { if (Random().NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewLongRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewLongRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new LongFieldSource("field"); Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); LongRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random().NextBoolean()) { if (Random().NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Close(w, r, dir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: ByteArrayOutputStream bos = new ByteArrayOutputStream(); PrintTaxonomyStats.PrintStats(taxoReader, Console.Out, true); string result = bos.ToString(); Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new LongField("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.Reader; var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewLongRange("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(tw, tr, td, w, r, d); }
public virtual void TestRandomFloats() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); int numDocs = AtLeast(1000); float[] values = new float[numDocs]; float minValue = float.PositiveInfinity; float maxValue = float.NegativeInfinity; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); float v = Random().NextFloat(); values[i] = v; doc.Add(new FloatDocValuesField("field", v)); doc.Add(new FloatField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt(Random(), 1, 5); DoubleRange[] ranges = new DoubleRange[numRange]; int[] expectedCounts = new int[numRange]; float minAcceptedValue = float.PositiveInfinity; float maxAcceptedValue = float.NegativeInfinity; if (VERBOSE) { Console.WriteLine("TEST: " + numRange + " ranges"); } for (int rangeID = 0; rangeID < numRange; rangeID++) { double min; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random().NextDouble(); } double max; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random().NextDouble(); } if (min > max) { double x = min; min = max; max = x; } // Must truncate to float precision so that the // drill-down counts (which use NRQ.newFloatRange) // are correct: min = (float)min; max = (float)max; bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random().NextBoolean(); maxIncl = Random().NextBoolean(); } ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); if (VERBOSE) { Console.WriteLine("TEST: range " + rangeID + ": " + ranges[rangeID]); } // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (VERBOSE) { Console.WriteLine("TEST: check doc=" + i + " val=" + values[i] + " accept=" + accept); } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random().NextBoolean()) { if (Random().NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewFloatRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewFloatRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new FloatFieldSource("field"); Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine("TEST: verify range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); DoubleRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random().NextBoolean()) { if (Random().NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewFloatRange("field", (float)range.Min, (float)range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewFloatRange("field", (float)range.Min, (float)range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Close(w, r, dir); }
public virtual void TestRandomFloats() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int numDocs = AtLeast(1000); float[] values = new float[numDocs]; float minValue = float.PositiveInfinity; float maxValue = float.NegativeInfinity; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); float v = Random.NextSingle(); values[i] = v; doc.Add(new SingleDocValuesField("field", v)); doc.Add(new SingleField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt32(Random, 1, 5); DoubleRange[] ranges = new DoubleRange[numRange]; int[] expectedCounts = new int[numRange]; float minAcceptedValue = float.PositiveInfinity; float maxAcceptedValue = float.NegativeInfinity; if (VERBOSE) { Console.WriteLine("TEST: " + numRange + " ranges"); } for (int rangeID = 0; rangeID < numRange; rangeID++) { double min; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random.NextDouble(); } double max; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random.NextDouble(); } if (min > max) { double x = min; min = max; max = x; } // Must truncate to float precision so that the // drill-down counts (which use NRQ.newFloatRange) // are correct: min = (float)min; max = (float)max; bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random.NextBoolean(); maxIncl = Random.NextBoolean(); } ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); if (VERBOSE) { Console.WriteLine("TEST: range " + rangeID + ": " + ranges[rangeID]); } // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (VERBOSE) { Console.WriteLine("TEST: check doc=" + i + " val=" + values[i] + " accept=" + accept); } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random.NextBoolean()) { if (Random.NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewSingleRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewSingleRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new SingleFieldSource("field"); Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine("TEST: verify range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.Label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value); DoubleRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random.NextBoolean()) { if (Random.NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewSingleRange("field", (float)range.Min, (float)range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewSingleRange("field", (float)range.Min, (float)range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Dispose(w, r, dir); }
public virtual void TestCustomDoublesValueSource() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); writer.AddDocument(doc); writer.AddDocument(doc); writer.AddDocument(doc); // Test wants 3 docs in one segment: writer.ForceMerge(1); var vs = new ValueSourceAnonymousInnerClassHelper(this, doc); FacetsConfig config = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); IndexReader r = writer.Reader; IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) }; Filter fastMatchFilter; AtomicBoolean filterWasUsed = new AtomicBoolean(); if (Random().NextBoolean()) { // Sort of silly: fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed); } else { fastMatchFilter = null; } if (VERBOSE) { Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter); } Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.GetTopChildren(10, "field").ToString()); Assert.True(fastMatchFilter == null || filterWasUsed.Get()); DrillDownQuery ddq = new DrillDownQuery(config); ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs)); // Test simple drill-down: Assert.AreEqual(1, s.Search(ddq, 10).TotalHits); // Test drill-sideways after drill-down DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter); DrillSidewaysResult dsr = ds.Search(ddq, 10); Assert.AreEqual(1, dsr.Hits.TotalHits); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(r, writer, dir); }
public virtual void TestCustomDoublesValueSource() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); writer.AddDocument(doc); writer.AddDocument(doc); writer.AddDocument(doc); // Test wants 3 docs in one segment: writer.ForceMerge(1); var vs = new ValueSourceAnonymousInnerClassHelper(this, doc); FacetsConfig config = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); IndexReader r = writer.GetReader(); IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) }; Filter fastMatchFilter; AtomicBoolean filterWasUsed = new AtomicBoolean(); if (Random.NextBoolean()) { // Sort of silly: fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed); } else { fastMatchFilter = null; } if (VERBOSE) { Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter); } Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.GetTopChildren(10, "field").ToString()); Assert.True(fastMatchFilter == null || filterWasUsed.Get()); DrillDownQuery ddq = new DrillDownQuery(config); ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs)); // Test simple drill-down: Assert.AreEqual(1, s.Search(ddq, 10).TotalHits); // Test drill-sideways after drill-down DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter); DrillSidewaysResult dsr = ds.Search(ddq, 10); Assert.AreEqual(1, dsr.Hits.TotalHits); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Dispose(r, writer, dir); }
public virtual void TestIntSumAssociationDrillDown() { FacetsCollector fc = new FacetsCollector(); IndexSearcher searcher = NewSearcher(reader); DrillDownQuery q = new DrillDownQuery(config); q.Add("int", "b"); searcher.Search(q, fc); Facets facets = new TaxonomyFacetSumIntAssociations("$facets.int", taxoReader, config, fc); Assert.AreEqual("dim=int path=[] value=-1 childCount=2\n b (150)\n a (100)\n", facets.GetTopChildren(10, "int").ToString()); Assert.AreEqual(100, (int)facets.GetSpecificValue("int", "a"), "Wrong count for category 'a'!"); Assert.AreEqual(150, (int)facets.GetSpecificValue("int", "b"), "Wrong count for category 'b'!"); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } result = bos.ToString(); } Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestRandomDoubles() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int numDocs = AtLeast(1000); double[] values = new double[numDocs]; double minValue = double.PositiveInfinity; double maxValue = double.NegativeInfinity; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); double v = Random().NextDouble(); values[i] = v; doc.Add(new DoubleDocValuesField("field", v)); doc.Add(new DoubleField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt(Random(), 1, 5); DoubleRange[] ranges = new DoubleRange[numRange]; int[] expectedCounts = new int[numRange]; double minAcceptedValue = double.PositiveInfinity; double maxAcceptedValue = double.NegativeInfinity; for (int rangeID = 0; rangeID < numRange; rangeID++) { double min; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random().NextDouble(); } double max; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random().NextDouble(); } if (min > max) { double x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random().NextBoolean(); maxIncl = Random().NextBoolean(); } ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random().NextBoolean()) { if (Random().NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new DoubleFieldSource("field"); Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); DoubleRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random().NextBoolean()) { if (Random().NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Close(w, r, dir); }
private DrillDownQuery ToQuery(SearchOptions searchOptions, CategoryOptions categoryOptions) { var query = new DrillDownQuery(Facets.FacetsConfig, ToQuery(searchOptions.Query)); if (categoryOptions != null && categoryOptions.DrillDown.Any()) { foreach (var kvp in categoryOptions.DrillDown) query.add(kvp.Key, kvp.Value.ToArray()); } return query; }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestRandomLongs() { Directory dir = NewDirectory(); var w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int numDocs = AtLeast(1000); if (Verbose) { Console.WriteLine("TEST: numDocs=" + numDocs); } long[] values = new long[numDocs]; long minValue = long.MaxValue; long maxValue = long.MinValue; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); long v = Random.NextInt64(); values[i] = v; doc.Add(new NumericDocValuesField("field", v)); doc.Add(new Int64Field("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (Verbose) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt32(Random, 1, 100); Int64Range[] ranges = new Int64Range[numRange]; int[] expectedCounts = new int[numRange]; long minAcceptedValue = long.MaxValue; long maxAcceptedValue = long.MinValue; for (int rangeID = 0; rangeID < numRange; rangeID++) { long min; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: Int64Range prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random.NextInt64(); } long max; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: Int64Range prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random.NextInt64(); } if (min > max) { long x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random.NextBoolean(); maxIncl = Random.NextBoolean(); } ranges[rangeID] = new Int64Range("r" + rangeID, min, minIncl, max, maxIncl); if (Verbose) { Console.WriteLine(" range " + rangeID + ": " + ranges[rangeID]); } // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random.NextBoolean()) { if (Random.NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewInt64Range("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewInt64Range("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new Int64FieldSource("field"); Facets facets = new Int64RangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (Verbose) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.Label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value); Int64Range range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random.NextBoolean()) { if (Random.NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewInt64Range("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewInt64Range("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Dispose(w, r, dir); }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new Int64Field("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.GetReader(); var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Dispose(tw, tr, td, w, r, d); }
/// <summary> /// Searches the Lucene index for Documents that match the specified search criteria. /// </summary> /// <param name="criteria">The search criteria.</param> /// <returns></returns> /// <exception cref="ArgumentNullException"></exception> /// <exception cref="System.ArgumentNullException"></exception> public SearchResult<Guid> Search(SearchCriteria criteria) { if (criteria == null) throw new ArgumentNullException(nameof(criteria)); criteria.Query = String.IsNullOrWhiteSpace(criteria.Query) ? ALL_DOCS_QUERY : criteria.Query; criteria.TopN = criteria.TopN > 0 ? criteria.TopN : SearchCriteria.DEFAULT_TOP_N; criteria.ItemsPerPage = criteria.ItemsPerPage > 0 ? criteria.ItemsPerPage : SearchCriteria.DEFAULT_ITEMS_PER_PAGE; criteria.PageNumber = criteria.PageNumber > 0 ? criteria.PageNumber : 1; criteria.Validate(); var result = new SearchResult<Guid>(criteria); var queryParser = new LuceneQueryParser(Schema.StandardField.FULL_TEXT, _compositeAnalyzer, Schema); var query = queryParser.Parse(criteria.Query); var instance = _searcherTaxonomyManager.Acquire() as SearcherTaxonomyManagerSearcherAndTaxonomy; if (instance != null) { var searcher = instance.Searcher; var taxonomyReader = instance.TaxonomyReader; try { var sort = GetSortCriteria(criteria.SortByField); var selectedFacets = criteria.SelectCategories.ToFacetFields(); var topDocs = (TopDocs)null; var categories = (IEnumerable<Category>)null; if (selectedFacets.Count() == 0) { // We are not going to do a drill-down on specific facets. // Instead we will just take the top N facets from the matching Documents. var facetsCollector = new FacetsCollector(); // Get the matching Documents topDocs = FacetsCollector.Search(searcher, query, criteria.TopN, sort, facetsCollector); // Get the Facet counts from the matching Documents var facetCounts = new FastTaxonomyFacetCounts(taxonomyReader, _facetBuilder.FacetsConfig, facetsCollector); categories = facetCounts.GetCategories(criteria.TopNCategories); } else { // Perform a drill-sideways query var drillDownQuery = new DrillDownQuery(_facetBuilder.FacetsConfig, query); foreach (var facetField in selectedFacets) drillDownQuery.Add(facetField.Dim, facetField.Path); var drillSideways = new DrillSideways(searcher, _facetBuilder.FacetsConfig, taxonomyReader); var drillSidewaysResult = drillSideways.Search(drillDownQuery, null, null, criteria.TopN, sort, false, false); // Get the matching documents topDocs = drillSidewaysResult.Hits; // Get the Facet counts from the matching Documents categories = drillSidewaysResult.Facets.GetCategories(criteria.TopNCategories, selectedFacets); } // TODO: Don't pass TopDocs; pass an IEnumerable<Guid> result.PopulateWith(topDocs, categories, id => searcher.Doc(id)); } finally { _searcherTaxonomyManager.Release(instance); searcher = null; taxonomyReader = null; } } return result; }
/// <summary> /// Search facet content in the existing index. /// </summary> /// <param name="text">The text to search for.</param> /// <param name="indexFields">The array of index fields to search in.</param> /// <param name="facetPaths">The array of facet paths to perform a drill down search on.</param> /// <param name="numberToReturn">The maximum number of documents to return.</param> /// <returns>The facet document.</returns> /// <remarks>Use wildcard chars ('*', '?', '\'), logical ('AND', 'OR'), Quoted exact phrase ("search this").</remarks> public Nequeo.Search.Engine.FacetDocument SearchFacetDocument(string text, FacetData.IndexField[] indexFields, FacetPath[] facetPaths, int numberToReturn = Int32.MaxValue) { Nequeo.Search.Engine.FacetDocument documents = new FacetDocument(); documents.TotalHits = 0; try { // If text exists. if (!String.IsNullOrEmpty(text)) { // Load the searcher. Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_reader); string searchFieldName = "facetcontent"; Query query = null; DrillDownQuery queryFacet = null; TopDocs results = null; // Build the facet configuration information. FacetsConfig config = new FacetsConfig(); // Add the config. foreach (FacetData.IndexField item in indexFields) { config.SetIndexFieldName(item.DimensionName, item.IndexFieldName); } // Get bytes char[] textArray = text.ToCharArray(); // Search logical. if (text.Contains("AND") || text.Contains("OR")) { // Create the query. query = CreateLogicalQuery(text, searchFieldName); } else if (textArray[0].Equals('"') && textArray[textArray.Length - 1].Equals('"')) { // Create the query. query = CreateQuotedQuery(new string(textArray, 1, textArray.Length - 2), searchFieldName); } else { // Create the query. query = CreateBoolenQuery(text, BooleanClause.Occur.SHOULD, searchFieldName); } // Create the facet query. queryFacet = new DrillDownQuery(config, query); foreach (FacetPath facetPath in facetPaths) { // Add the path. queryFacet.Add(facetPath.DimensionName, facetPath.Path); } // The collector. FacetsCollector collector = new FacetsCollector(); // Search. if (queryFacet != null) { results = FacetsCollector.Search(searcher, queryFacet, numberToReturn, collector); } // Get the total number of results that was asked for. int totalResult = ((results.ScoreDocs != null && results.ScoreDocs.Length > 0) ? results.ScoreDocs.Length : 0); // If result found. if (results != null && results.TotalHits > 0) { List <TextDataResult> textDataResults = new List <TextDataResult>(); List <FileDocumentResult> fileDocResults = new List <FileDocumentResult>(); List <FacetPathResult> facetPathResults = new List <FacetPathResult>(); IDictionary <string, Facets> facetsMap = new Dictionary <string, Facets>(); // Add the facet count. foreach (FacetData.IndexField item in indexFields) { // Add the facet for each index field. facetsMap[item.DimensionName] = GetTaxonomyFacetCounts(_facetReader, config, collector, item.IndexFieldName); } // Create the multi facet list. foreach (FacetPath facetPath in facetPaths) { try { // Add the facets. Facets facets = facetsMap.First(u => u.Key.ToLower().Contains(facetPath.DimensionName.ToLower())).Value; float number = facets.GetSpecificValue(facetPath.DimensionName, facetPath.Path); // Add the path. facetPathResults.Add(new FacetPathResult(facetPath.DimensionName, number, facetPath.Path)); } catch { } } // For each document found. for (int i = 0; i < totalResult; i++) { FileDocumentResult fileDocument = null; TextDataResult textData = null; int docID = results.ScoreDocs[i].Doc; Lucene.Net.Documents.Document doc = searcher.Doc(docID); try { // Get the data for each field. IndexableField[] textNameFields = doc.GetFields("textname"); // If this field exists then text data. if (textNameFields.Length > 0) { // Assign the data to the text document. textData = new TextDataResult(); textData.Name = textNameFields.Length > 0 ? textNameFields[0].StringValue : null; textData.Score = results.ScoreDocs[i].Score; textData.Doc = docID; // Do not know if the text was stored. IndexableField[] textValueFields = doc.GetFields("textcomplete"); textData.Text = textValueFields.Length > 0 ? textValueFields[0].StringValue : null; } } catch { } // If text data exists then add. if (textData != null) { textDataResults.Add(textData); } try { // Get the data for each field. IndexableField[] pathNameFields = doc.GetFields("path"); IndexableField[] modifiedNameFields = doc.GetFields("modified"); // If this field exists then file document. if (pathNameFields.Length > 0) { // Assign the data to the path document. fileDocument = new FileDocumentResult(); fileDocument.Path = pathNameFields.Length > 0 ? pathNameFields[0].StringValue : null; fileDocument.Modified = modifiedNameFields.Length > 0 ? modifiedNameFields[0].StringValue : null; fileDocument.Score = results.ScoreDocs[i].Score; fileDocument.Doc = docID; } } catch { } // If file data exists then add. if (fileDocument != null) { fileDocResults.Add(fileDocument); } } // Assign the facet document values. documents.MaxScore = results.MaxScore; documents.TotalHits = results.TotalHits; documents.FacetPathResults = facetPathResults.ToArray(); documents.TextDataResults = textDataResults.ToArray(); documents.FileDocumentResults = fileDocResults.ToArray(); } } // Return the documents. return(documents); } catch (Exception) { throw; } }