Collects hits for subsequent faceting. Once you've run a search and collect hits into this, instantiate one of the Collector subclasses to do the facet counting. Use the {@code search} utility methods to perform an "ordinary" search but also collect into a Facets.
Inheritance: Lucene.Net.Search.Collector
        public virtual void TestAddSameDocTwice()
        {
            // LUCENE-5367: this was a problem with the previous code, making sure it
            // works with the new code.
            Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig facetsConfig = new FacetsConfig();
            Document doc = new Document();
            doc.Add(new FacetField("a", "b"));
            doc = facetsConfig.Build(taxoWriter, doc);
            // these two addDocument() used to fail
            indexWriter.AddDocument(doc);
            indexWriter.AddDocument(doc);
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);
            FacetsCollector fc = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), fc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc);
            FacetResult res = facets.GetTopChildren(10, "a");
            Assert.AreEqual(1, res.LabelValues.Length);
            Assert.AreEqual(2, res.LabelValues[0].value);
            IOUtils.Close(indexReader, taxoReader);

            IOUtils.Close(indexDir, taxoDir);
        }
 /// <summary>
 /// Create {@code TaxonomyFacetCounts}, which also
 ///  counts all facet labels.  Use this for a non-default
 ///  <seealso cref="OrdinalsReader"/>; otherwise use {@link
 ///  FastTaxonomyFacetCounts}. 
 /// </summary>
 public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : base(ordinalsReader.IndexFieldName, taxoReader, config)
 {
     this.ordinalsReader = ordinalsReader;
     Count(fc.GetMatchingDocs);
 }
 /// <summary>
 /// Create {@code TaxonomyFacetSumFloatAssociations} against
 ///  the specified index field. 
 /// </summary>
 public TaxonomyFacetSumFloatAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : base(indexFieldName, taxoReader, config)
 {
     SumValues(fc.GetMatchingDocs);
 }
 /// <summary>
 /// Create {@code TaxonomyFacetSumFloatAssociations} against
 ///  the default index field. 
 /// </summary>
 public TaxonomyFacetSumFloatAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
 {
 }
 private static FacetsCollector PerformSearch(TaxonomyReader tr, IndexReader ir, IndexSearcher searcher)
 {
     FacetsCollector fc = new FacetsCollector();
     FacetsCollector.Search(searcher, new MatchAllDocsQuery(), 10, fc);
     return fc;
 }
 /// <summary>
 /// Create {@code RangeFacetCounts}, using the provided
 ///  <seealso cref="ValueSource"/>, and using the provided Filter as
 ///  a fastmatch: only documents passing the filter are
 ///  checked for the matching ranges.  The filter must be
 ///  random access (implement <seealso cref="DocIdSet#bits"/>). 
 /// </summary>
 public DoubleRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, DoubleRange[] ranges)
     : base(field, ranges, fastMatchFilter)
 {
     Count(valueSource, hits.GetMatchingDocs);
 }
 /// <summary>
 /// Create {@code RangeFacetCounts}, using {@link
 ///  DoubleFieldSource} from the specified field. 
 /// </summary>
 public DoubleRangeFacetCounts(string field, FacetsCollector hits, params DoubleRange[] ranges)
     : this(field, new DoubleFieldSource(field), hits, ranges)
 {
 }
 /// <summary>
 /// Create {@code FastTaxonomyFacetCounts}, using the
 ///  specified {@code indexFieldName} for ordinals.  Use
 ///  this if you had set {@link
 ///  FacetsConfig#setIndexFieldName} to change the index
 ///  field name for certain dimensions. 
 /// </summary>
 public FastTaxonomyFacetCounts(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : base(indexFieldName, taxoReader, config)
 {
     Count(fc.GetMatchingDocs);
 }
Beispiel #9
0
        /// <summary>
        /// Subclass can override to customize per-dim Facets
        ///  impl. 
        /// </summary>
        protected virtual Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims)
        {

            Facets drillDownFacets;
            var drillSidewaysFacets = new Dictionary<string, Facets>();

            if (taxoReader != null)
            {
                drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns);
                if (drillSideways != null)
                {
                    for (int i = 0; i < drillSideways.Length; i++)
                    {
                        drillSidewaysFacets[drillSidewaysDims[i]] = new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]);
                    }
                }
            }
            else
            {
                drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns);
                if (drillSideways != null)
                {
                    for (int i = 0; i < drillSideways.Length; i++)
                    {
                        drillSidewaysFacets[drillSidewaysDims[i]] = new SortedSetDocValuesFacetCounts(state, drillSideways[i]);
                    }
                }
            }

            if (drillSidewaysFacets.Count == 0)
            {
                return drillDownFacets;
            }
            else
            {
                return new MultiFacets(drillSidewaysFacets, drillDownFacets);
            }
        }
Beispiel #10
0
        /// <summary>
        /// Search, collecting hits with a <seealso cref="Collector"/>, and
        /// computing drill down and sideways counts.
        /// </summary>
        public virtual DrillSidewaysResult Search(DrillDownQuery query, Collector hitCollector)
        {

            IDictionary<string, int?> drillDownDims = query.Dims;

            FacetsCollector drillDownCollector = new FacetsCollector();

            if (drillDownDims.Count == 0)
            {
                // There are no drill-down dims, so there is no
                // drill-sideways to compute:
                searcher.Search(query, MultiCollector.Wrap(hitCollector, drillDownCollector));
                return new DrillSidewaysResult(BuildFacetsResult(drillDownCollector, null, null), null);
            }

            BooleanQuery ddq = query.BooleanQuery;
            BooleanClause[] clauses = ddq.Clauses;

            Query baseQuery;
            int startClause;
            if (clauses.Length == drillDownDims.Count)
            {
                // TODO: we could optimize this pure-browse case by
                // making a custom scorer instead:
                baseQuery = new MatchAllDocsQuery();
                startClause = 0;
            }
            else
            {
                Debug.Assert(clauses.Length == 1 + drillDownDims.Count);
                baseQuery = clauses[0].Query;
                startClause = 1;
            }

            FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.Count];
            for (int i = 0; i < drillSidewaysCollectors.Length; i++)
            {
                drillSidewaysCollectors[i] = new FacetsCollector();
            }

            Query[] drillDownQueries = new Query[clauses.Length - startClause];
            for (int i = startClause; i < clauses.Length; i++)
            {
                drillDownQueries[i - startClause] = clauses[i].Query;
            }
            DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, ScoreSubDocsAtOnce());
            searcher.Search(dsq, hitCollector);

            return new DrillSidewaysResult(BuildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.Keys.ToArray()), null);
        }
 /// <summary>
 /// Create {@code LongRangeFacetCounts}, using {@link
 ///  LongFieldSource} from the specified field. 
 /// </summary>
 public LongRangeFacetCounts(string field, FacetsCollector hits, params LongRange[] ranges)
     : this(field, new LongFieldSource(field), hits, ranges)
 {
 }
 /// <summary>
 /// Aggreggates float facet values from the provided
 ///  <seealso cref="ValueSource"/>, and pulls ordinals from the
 ///  provided <seealso cref="OrdinalsReader"/>. 
 /// </summary>
 public TaxonomyFacetSumValueSource(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource)
     : base(ordinalsReader.IndexFieldName, taxoReader, config)
 {
     this.ordinalsReader = ordinalsReader;
     SumValues(fc.GetMatchingDocs, fc.KeepScores, valueSource);
 }
 /// <summary>
 /// Aggreggates float facet values from the provided
 ///  <seealso cref="ValueSource"/>, pulling ordinals using {@link
 ///  DocValuesOrdinalsReader} against the default indexed
 ///  facet field {@link
 ///  FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. 
 /// </summary>
 public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource)
     : this(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME), taxoReader, config, fc, valueSource)
 {
 }
 public DocsAnonymousInnerClassHelper(FacetsCollector outerInstance, int maxDoc)
 {
     this.outerInstance = outerInstance;
     this.maxDoc = maxDoc;
     bits = new FixedBitSet(maxDoc);
 }
 /// <summary>
 /// Create {@code RangeFacetCounts}, using the provided
 ///  <seealso cref="ValueSource"/>. 
 /// </summary>
 public DoubleRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, params DoubleRange[] ranges)
     : this(field, valueSource, hits, null, ranges)
 {
 }
        public virtual void TestRandomSampling()
        {
            Directory dir = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random();

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
            IOUtils.Close(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.NotNull(collectRandomZeroResults.GetMatchingDocs);

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs)
            {
                Assert.AreEqual(0, doc.totalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling
            RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.True(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.True(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;
            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;
            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.True(sigma < 200);
            Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
 /// <summary>
 /// Create {@code FastTaxonomyFacetCounts}, which also
 ///  counts all facet labels. 
 /// </summary>
 public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
 {
 }