public virtual void TestAddSameDocTwice() { // LUCENE-5367: this was a problem with the previous code, making sure it // works with the new code. Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig facetsConfig = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "b")); doc = facetsConfig.Build(taxoWriter, doc); // these two addDocument() used to fail indexWriter.AddDocument(doc); indexWriter.AddDocument(doc); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector fc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), fc); Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc); FacetResult res = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, res.LabelValues.Length); Assert.AreEqual(2, res.LabelValues[0].value); IOUtils.Close(indexReader, taxoReader); IOUtils.Close(indexDir, taxoDir); }
/// <summary> /// Create {@code TaxonomyFacetSumFloatAssociations} against /// the default index field. /// </summary> public TaxonomyFacetSumFloatAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc) { }
private static FacetsCollector PerformSearch(TaxonomyReader tr, IndexReader ir, IndexSearcher searcher) { FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new MatchAllDocsQuery(), 10, fc); return fc; }
/// <summary> /// Create {@code RangeFacetCounts}, using the provided /// <seealso cref="ValueSource"/>. /// </summary> public DoubleRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, params DoubleRange[] ranges) : this(field, valueSource, hits, null, ranges) { }
/// <summary> /// Create {@code RangeFacetCounts}, using the provided /// <seealso cref="ValueSource"/>, and using the provided Filter as /// a fastmatch: only documents passing the filter are /// checked for the matching ranges. The filter must be /// random access (implement <seealso cref="DocIdSet#bits"/>). /// </summary> public DoubleRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, DoubleRange[] ranges) : base(field, ranges, fastMatchFilter) { Count(valueSource, hits.GetMatchingDocs); }
public DocsAnonymousInnerClassHelper(FacetsCollector outerInstance, int maxDoc) { this.outerInstance = outerInstance; this.maxDoc = maxDoc; bits = new FixedBitSet(maxDoc); }
public virtual Facets GetTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector c, string indexFieldName) { Facets facets; if (Random().NextBoolean()) { facets = new FastTaxonomyFacetCounts(indexFieldName, taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(indexFieldName); if (Random().NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } return(facets); }
/// <summary> /// Create {@code LongRangeFacetCounts}, using {@link /// LongFieldSource} from the specified field. /// </summary> public LongRangeFacetCounts(string field, FacetsCollector hits, params LongRange[] ranges) : this(field, new LongFieldSource(field), hits, ranges) { }
/// <summary> /// Subclass can override to customize per-dim Facets /// impl. /// </summary> protected virtual Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims) { Facets drillDownFacets; var drillSidewaysFacets = new Dictionary<string, Facets>(); if (taxoReader != null) { drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.Length; i++) { drillSidewaysFacets[drillSidewaysDims[i]] = new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]); } } } else { drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.Length; i++) { drillSidewaysFacets[drillSidewaysDims[i]] = new SortedSetDocValuesFacetCounts(state, drillSideways[i]); } } } if (drillSidewaysFacets.Count == 0) { return drillDownFacets; } else { return new MultiFacets(drillSidewaysFacets, drillDownFacets); } }
/// <summary> /// Aggreggates float facet values from the provided /// <seealso cref="ValueSource"/>, pulling ordinals using {@link /// DocValuesOrdinalsReader} against the default indexed /// facet field {@link /// FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. /// </summary> public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource) : this(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME), taxoReader, config, fc, valueSource) { }
/// <summary> /// Aggreggates float facet values from the provided /// <seealso cref="ValueSource"/>, and pulls ordinals from the /// provided <seealso cref="OrdinalsReader"/>. /// </summary> public TaxonomyFacetSumValueSource(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource) : base(ordinalsReader.IndexFieldName, taxoReader, config) { this.ordinalsReader = ordinalsReader; SumValues(fc.GetMatchingDocs, fc.KeepScores, valueSource); }
public DocsAnonymousInnerClassHelper(FacetsCollector outerInstance, int maxDoc) { this.outerInstance = outerInstance; this.maxDoc = maxDoc; bits = new FixedBitSet(maxDoc); }
/// <summary> /// Create {@code FastTaxonomyFacetCounts}, using the /// specified {@code indexFieldName} for ordinals. Use /// this if you had set {@link /// FacetsConfig#setIndexFieldName} to change the index /// field name for certain dimensions. /// </summary> public FastTaxonomyFacetCounts(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : base(indexFieldName, taxoReader, config) { Count(fc.GetMatchingDocs); }
/// <summary> /// Create {@code FastTaxonomyFacetCounts}, which also /// counts all facet labels. /// </summary> public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc) { }
/// <summary> /// Create {@code TaxonomyFacetSumFloatAssociations} against /// the specified index field. /// </summary> public TaxonomyFacetSumFloatAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : base(indexFieldName, taxoReader, config) { SumValues(fc.GetMatchingDocs); }
/// <summary> /// Search, collecting hits with a <seealso cref="Collector"/>, and /// computing drill down and sideways counts. /// </summary> public virtual DrillSidewaysResult Search(DrillDownQuery query, Collector hitCollector) { IDictionary<string, int?> drillDownDims = query.Dims; FacetsCollector drillDownCollector = new FacetsCollector(); if (drillDownDims.Count == 0) { // There are no drill-down dims, so there is no // drill-sideways to compute: searcher.Search(query, MultiCollector.Wrap(hitCollector, drillDownCollector)); return new DrillSidewaysResult(BuildFacetsResult(drillDownCollector, null, null), null); } BooleanQuery ddq = query.BooleanQuery; BooleanClause[] clauses = ddq.Clauses; Query baseQuery; int startClause; if (clauses.Length == drillDownDims.Count) { // TODO: we could optimize this pure-browse case by // making a custom scorer instead: baseQuery = new MatchAllDocsQuery(); startClause = 0; } else { Debug.Assert(clauses.Length == 1 + drillDownDims.Count); baseQuery = clauses[0].Query; startClause = 1; } FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.Count]; for (int i = 0; i < drillSidewaysCollectors.Length; i++) { drillSidewaysCollectors[i] = new FacetsCollector(); } Query[] drillDownQueries = new Query[clauses.Length - startClause]; for (int i = startClause; i < clauses.Length; i++) { drillDownQueries[i - startClause] = clauses[i].Query; } DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, ScoreSubDocsAtOnce()); searcher.Search(dsq, hitCollector); return new DrillSidewaysResult(BuildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.Keys.ToArray()), null); }
/// <summary> /// Create {@code TaxonomyFacetCounts}, which also /// counts all facet labels. Use this for a non-default /// <seealso cref="OrdinalsReader"/>; otherwise use {@link /// FastTaxonomyFacetCounts}. /// </summary> public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : base(ordinalsReader.IndexFieldName, taxoReader, config) { this.ordinalsReader = ordinalsReader; Count(fc.GetMatchingDocs); }
/// <summary> /// Create {@code RangeFacetCounts}, using {@link /// DoubleFieldSource} from the specified field. /// </summary> public DoubleRangeFacetCounts(string field, FacetsCollector hits, params DoubleRange[] ranges) : this(field, new DoubleFieldSource(field), hits, ranges) { }
public virtual void TestRandomSampling() { Directory dir = NewDirectory(); Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(10000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO)); doc.Add(new FacetField("iMod10", Convert.ToString(i % 10))); writer.AddDocument(config.Build(taxoWriter, doc)); } Random random = Random(); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); IOUtils.Close(writer, taxoWriter); // Test empty results RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // There should be no divisions by zero searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults); // There should be no divisions by zero and no null result Assert.NotNull(collectRandomZeroResults.GetMatchingDocs); // There should be no results at all foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs) { Assert.AreEqual(0, doc.totalHits); } // Now start searching and retrieve results. // Use a query to select half of the documents. TermQuery query = new TermQuery(new Term("EvenOdd", "even")); // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i % // 10) are hits. // there is a REAL small chance that one of the 5 values will be missed when // sampling. // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be // missing) ~ 10^-193 // so that is probably not going to happen. int maxNumChildren = 5; RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits FacetsCollector fc = new FacetsCollector(); searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent)); FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent); FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent); FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc); FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher); FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10"); FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10"); Assert.AreEqual(random100Result, exactResult); // we should have five children, but there is a small chance we have less. // (see above). Assert.True(random10Result.ChildCount <= maxNumChildren); // there should be one child at least. Assert.True(random10Result.ChildCount >= 1); // now calculate some statistics to determine if the sampled result is 'ok'. // because random sampling is used, the results will vary each time. int sum = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { sum += (int)lav.value; } float mu = (float)sum / (float)maxNumChildren; float variance = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { variance += (float)Math.Pow((mu - (int)lav.value), 2); } variance = variance / maxNumChildren; float sigma = (float)Math.Sqrt(variance); // we query only half the documents and have 5 categories. The average // number of docs in a category will thus be the total divided by 5*2 float targetMu = numDocs / (5.0f * 2.0f); // the average should be in the range and the standard deviation should not // be too great Assert.True(sigma < 200); Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual Facets GetTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector c) { return(GetTaxonomyFacetCounts(taxoReader, config, c, FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); }