Exemplo n.º 1
0
        /// <summary>User runs a query and counts facets.</summary>
        private IList <FacetResult> FacetsWithSearch()
        {
            using (DirectoryReader indexReader = DirectoryReader.Open(indexDir))
                using (TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir))
                {
                    IndexSearcher searcher = new IndexSearcher(indexReader);

                    FacetsCollector fc = new FacetsCollector();

                    // MatchAllDocsQuery is for "browsing" (counts facets
                    // for all non-deleted docs in the index); normally
                    // you'd use a "normal" query:
                    FacetsCollector.Search(searcher, new MatchAllDocsQuery(), 10, fc);

                    // Retrieve results
                    IList <FacetResult> results = new List <FacetResult>();

                    // Count both "Publish Date" and "Author" dimensions
                    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
                    results.Add(facets.GetTopChildren(10, "Author"));
                    results.Add(facets.GetTopChildren(10, "Publish Date"));

                    return(results);
                }// Disposes indexReader and taxoReader
        }
Exemplo n.º 2
0
        /// <summary>User runs a query and counts facets only without collecting the matching documents.</summary>
        private IList <FacetResult> FacetsOnly()
        {
            using DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            using TaxonomyReader taxoReader   = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = new IndexSearcher(indexReader);

            FacetsCollector fc = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query:
            searcher.Search(new MatchAllDocsQuery(), null /*Filter */, fc);

            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            // Retrieve results
            IList <FacetResult> results = new List <FacetResult>
            {
                // Count both "Publish Date" and "Author" dimensions
                facets.GetTopChildren(10, "Author"),
                facets.GetTopChildren(10, "Publish Date")
            };

            return(results);
        }
            public bool? Call()
            {
                if (indexReader == null)
                {
                    indexReader = DirectoryReader.Open(indexDir);
                    lastIndexGeneration = indexReader.IndexCommit.Generation;
                    taxoReader = new DirectoryTaxonomyReader(taxoDir);
                }
                else
                {
                    // verify search index
                    DirectoryReader newReader = DirectoryReader.OpenIfChanged(indexReader);
                    assertNotNull("should not have reached here if no changes were made to the index", newReader);
                    long newGeneration = newReader.IndexCommit.Generation;
                    assertTrue("expected newer generation; current=" + lastIndexGeneration + " new=" + newGeneration, newGeneration > lastIndexGeneration);
                    indexReader.Dispose();
                    indexReader = newReader;
                    lastIndexGeneration = newGeneration;
                    TestUtil.CheckIndex(indexDir);

                    // verify taxonomy index
                    DirectoryTaxonomyReader newTaxoReader = TaxonomyReader.OpenIfChanged(taxoReader);
                    if (newTaxoReader != null)
                    {
                        taxoReader.Dispose();
                        taxoReader = newTaxoReader;
                    }
                    TestUtil.CheckIndex(taxoDir);

                    // verify faceted search
                    int id = int.Parse(indexReader.IndexCommit.UserData[VERSION_ID], NumberStyles.HexNumber);
                    IndexSearcher searcher = new IndexSearcher(indexReader);
                    FacetsCollector fc = new FacetsCollector();
                    searcher.Search(new MatchAllDocsQuery(), fc);
                    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
                    assertEquals(1, (int)facets.GetSpecificValue("A", id.ToString("X")));

                    DrillDownQuery drillDown = new DrillDownQuery(config);
                    drillDown.Add("A", id.ToString("X"));
                    TopDocs docs = searcher.Search(drillDown, 10);
                    assertEquals(1, docs.TotalHits);
                }
                return null;
            }
Exemplo n.º 4
0
        public virtual Facets GetTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector c, string indexFieldName)
        {
            Facets facets;

            if (Random.NextBoolean())
            {
                facets = new FastTaxonomyFacetCounts(indexFieldName, taxoReader, config, c);
            }
            else
            {
                OrdinalsReader ordsReader = new DocValuesOrdinalsReader(indexFieldName);
                if (Random.NextBoolean())
                {
                    ordsReader = new CachedOrdinalsReader(ordsReader);
                }
                facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
            }

            return(facets);
        }
Exemplo n.º 5
0
        /// <summary>
        /// User drills down on 'Publish Date/2010', and we
        /// return facets for 'Author'
        /// </summary>
        private FacetResult DrillDown()
        {
            using DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            using TaxonomyReader taxoReader   = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = new IndexSearcher(indexReader);

            // Passing no baseQuery means we drill down on all
            // documents ("browse only"):
            DrillDownQuery q = new DrillDownQuery(config);

            // Now user drills down on Publish Date/2010:
            q.Add("Publish Date", "2010");
            FacetsCollector fc = new FacetsCollector();

            FacetsCollector.Search(searcher, q, 10, fc);

            // Retrieve results
            Facets      facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
            FacetResult result = facets.GetTopChildren(10, "Author");

            return(result);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Subclass can override to customize per-dim Facets
        /// impl.
        /// </summary>
        protected virtual Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims)
        {
            Facets drillDownFacets;
            var    drillSidewaysFacets = new Dictionary <string, Facets>();

            if (m_taxoReader != null)
            {
                drillDownFacets = new FastTaxonomyFacetCounts(m_taxoReader, m_config, drillDowns);
                if (drillSideways != null)
                {
                    for (int i = 0; i < drillSideways.Length; i++)
                    {
                        drillSidewaysFacets[drillSidewaysDims[i]] = new FastTaxonomyFacetCounts(m_taxoReader, m_config, drillSideways[i]);
                    }
                }
            }
            else
            {
                drillDownFacets = new SortedSetDocValuesFacetCounts(m_state, drillDowns);
                if (drillSideways != null)
                {
                    for (int i = 0; i < drillSideways.Length; i++)
                    {
                        drillSidewaysFacets[drillSidewaysDims[i]] = new SortedSetDocValuesFacetCounts(m_state, drillSideways[i]);
                    }
                }
            }

            if (drillSidewaysFacets.Count == 0)
            {
                return(drillDownFacets);
            }
            else
            {
                return(new MultiFacets(drillSidewaysFacets, drillDownFacets));
            }
        }
        public virtual void TestRandomSampling()
        {
            Directory dir     = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter       writer     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10, CultureInfo.InvariantCulture)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random;

            // NRT open
            IndexSearcher searcher   = NewSearcher(writer.GetReader());
            var           taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            IOUtils.Dispose(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.IsNotNull(collectRandomZeroResults.GetMatchingDocs());

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs())
            {
                Assert.AreEqual(0, doc.TotalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextInt64());      // no sampling
            RandomSamplingFacetsCollector random10Percent  = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts  = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts     = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result  = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult     = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.IsTrue(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.IsTrue(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.Value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.Value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.IsTrue(sigma < 200);
            Assert.IsTrue(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Searches the Lucene index for Documents that match the specified search criteria.
        /// </summary>
        /// <param name="criteria">The search criteria.</param>
        /// <returns></returns>
        /// <exception cref="ArgumentNullException"></exception>
        /// <exception cref="System.ArgumentNullException"></exception>
        public SearchResult<Guid> Search(SearchCriteria criteria)
        {
            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));

            criteria.Query = String.IsNullOrWhiteSpace(criteria.Query) ? ALL_DOCS_QUERY : criteria.Query;
            criteria.TopN = criteria.TopN > 0 ? criteria.TopN : SearchCriteria.DEFAULT_TOP_N;
            criteria.ItemsPerPage = criteria.ItemsPerPage > 0 ? criteria.ItemsPerPage : SearchCriteria.DEFAULT_ITEMS_PER_PAGE;
            criteria.PageNumber = criteria.PageNumber > 0 ? criteria.PageNumber : 1;
            criteria.Validate();

            var result = new SearchResult<Guid>(criteria);
            var queryParser = new LuceneQueryParser(Schema.StandardField.FULL_TEXT, _compositeAnalyzer, Schema);
            var query = queryParser.Parse(criteria.Query);

            var instance = _searcherTaxonomyManager.Acquire() as SearcherTaxonomyManagerSearcherAndTaxonomy;
            if (instance != null)
            {
                var searcher = instance.Searcher;
                var taxonomyReader = instance.TaxonomyReader;

                try
                {
                    var sort = GetSortCriteria(criteria.SortByField);
                    var selectedFacets = criteria.SelectCategories.ToFacetFields();
                    var topDocs = (TopDocs)null;                                        
                    var categories = (IEnumerable<Category>)null;

                    if (selectedFacets.Count() == 0)
                    {
                        // We are not going to do a drill-down on specific facets.
                        // Instead we will just take the top N facets from the matching Documents.
                        var facetsCollector = new FacetsCollector();

                        // Get the matching Documents
                        topDocs = FacetsCollector.Search(searcher, query, criteria.TopN, sort, facetsCollector);

                        // Get the Facet counts from the matching Documents
                        var facetCounts = new FastTaxonomyFacetCounts(taxonomyReader, _facetBuilder.FacetsConfig, facetsCollector);                        
                        categories = facetCounts.GetCategories(criteria.TopNCategories);
                    }
                    else
                    {
                        // Perform a drill-sideways query
                        var drillDownQuery = new DrillDownQuery(_facetBuilder.FacetsConfig, query);
                        foreach (var facetField in selectedFacets)
                            drillDownQuery.Add(facetField.Dim, facetField.Path);                        

                        var drillSideways = new DrillSideways(searcher, _facetBuilder.FacetsConfig, taxonomyReader);
                        var drillSidewaysResult = drillSideways.Search(drillDownQuery, null, null, criteria.TopN, sort, false, false);

                        // Get the matching documents
                        topDocs = drillSidewaysResult.Hits;                        

                        // Get the Facet counts from the matching Documents
                        categories = drillSidewaysResult.Facets.GetCategories(criteria.TopNCategories, selectedFacets);
                    }

                    // TODO: Don't pass TopDocs; pass an IEnumerable<Guid>
                    result.PopulateWith(topDocs, categories, id => searcher.Doc(id));                    
                }
                finally
                {
                    _searcherTaxonomyManager.Release(instance); 
                    searcher = null;
                    taxonomyReader = null;
                }
            }

            return result;
        }
Exemplo n.º 9
0
        public async Task <SearchResult <T> > SearchAsync(SearchQuery queryDefinition, CancellationToken cancellationToken = default)
        {
            using (await writerLock.ReaderLockAsync(cancellationToken))
            {
                var      result = new SearchResult <T>();
                List <T> hits   = new List <T>();

                using (var writer = getWriter())
                {
                    Query query = new MatchAllDocsQuery();

                    // Term queries
                    if (queryDefinition.TermQueries.Any())
                    {
                        var phraseQuery = new MultiPhraseQuery();
                        foreach (var termQuery in queryDefinition.TermQueries)
                        {
                            phraseQuery.Add(
                                termQuery.value
                                .Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)
                                .Select(phrase => new Term(termQuery.field, phrase.ToLower()))
                                .ToArray()
                                );
                        }
                        query = phraseQuery;
                    }

                    var reader       = writer.DocsWriter.GetReader(applyAllDeletes: true);
                    var searcher     = new IndexSearcher(reader);
                    var luceneResult = searcher.Search(query, queryDefinition.Limit);

                    foreach (var doc in luceneResult.ScoreDocs)
                    {
                        var foundDoc = searcher.Doc(doc.Doc);
                        hits.Add(await inflateDocument(foundDoc));
                    }

                    result.TotalHits = luceneResult.TotalHits;
                    result.Hits      = hits;

                    // Facets
                    if (queryDefinition.Facets.Any())
                    {
                        FacetsConfig    facetsConfig = new FacetsConfig();
                        FacetsCollector fc           = new FacetsCollector();
                        FacetsCollector.Search(searcher, query, queryDefinition.FacetMax, fc);
                        using (var taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.Open(Path.Combine(options.IndexPath, indexType, "taxonomy"))))
                        {
                            var facets = new FastTaxonomyFacetCounts(taxonomyReader, facetsConfig, fc);
                            foreach (var facet in queryDefinition.Facets)
                            {
                                var facetGroup = new FacetGroup {
                                    Field = facet
                                };
                                facetGroup.Facets =
                                    facets.GetTopChildren(queryDefinition.FacetMax, facet).LabelValues
                                    .Select(x => new Facet {
                                    Key = x.Label, Count = (long)x.Value
                                })
                                    .ToArray();
                                result.FacetGroups.Add(facetGroup);
                            }
                        }
                    }
                }

                return(result);
            }
        }
Exemplo n.º 10
0
        /// <summary>
        /// Get the taxonomy facet caounts
        /// </summary>
        /// <param name="taxoReader">The taxonomy reader.</param>
        /// <param name="config">The facet configuration.</param>
        /// <param name="collector">The result collector.</param>
        /// <param name="indexFieldName">The index field name.</param>
        /// <returns>The facets.</returns>
        private Facets GetTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector collector, string indexFieldName)
        {
            Facets facets = new FastTaxonomyFacetCounts(indexFieldName, taxoReader, config, collector);

            return(facets);
        }
        public virtual void TestRandomSampling()
        {
            Directory dir = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random();

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
            IOUtils.Close(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.NotNull(collectRandomZeroResults.GetMatchingDocs());

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs())
            {
                Assert.AreEqual(0, doc.TotalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling
            RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.True(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.True(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;
            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.Value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;
            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.Value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.True(sigma < 200);
            Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }