/// <summary>
 /// Aggreggates float facet values from the provided
 /// <see cref="ValueSource"/>, and pulls ordinals from the
 /// provided <see cref="OrdinalsReader"/>. 
 /// </summary>
 public TaxonomyFacetSumValueSource(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader,
     FacetsConfig config, FacetsCollector fc, ValueSource valueSource)
     : base(ordinalsReader.IndexFieldName, taxoReader, config)
 {
     this.ordinalsReader = ordinalsReader;
     SumValues(fc.GetMatchingDocs(), fc.KeepScores, valueSource);
 }
Beispiel #2
0
        static private void Configure(FacetsConfig fc, string facetName, bool isHierarchical, bool isMultiValued)
        {
            if (isHierarchical)
                fc.setHierarchical(facetName, true);

            if (isMultiValued)
                fc.setMultiValued(facetName, true);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="LuceneFacetBuilder"/> class.
        /// </summary>
        /// <param name="taxonomyWriter">The taxonomy writer.</param>
        /// <exception cref="ArgumentNullException"></exception>
        public LuceneFacetBuilder(TaxonomyWriter taxonomyWriter)
        {
            if (taxonomyWriter == null)
                throw new ArgumentNullException(nameof(taxonomyWriter));

            _taxonomyWriter = taxonomyWriter;
            FacetsConfig = new FacetsConfig();
        }
 public IndexerThread(IndexWriter w, FacetsConfig config, TaxonomyWriter tw, ReferenceManager<SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop)
 {
     this.w = w;
     this.config = config;
     this.tw = tw;
     this.mgr = mgr;
     this.ordLimit = ordLimit;
     this.stop = stop;
 }
Beispiel #5
0
        static private FacetsConfig CreateFacetsConfig()
        {
            var facetsConfig = new FacetsConfig();

            Configure(facetsConfig, FacetNames.Folder, true, false);
            Configure(facetsConfig, FacetNames.PlaceName, true, false);
            Configure(facetsConfig, FacetNames.Keywords, false, true);
            Configure(facetsConfig, FacetNames.Date, true, false);

            return facetsConfig;
        }
        public virtual void TestBasic()
        {

            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            doc.Add(new SortedSetDocValuesFacetField("a", "bar"));
            doc.Add(new SortedSetDocValuesFacetField("a", "zoo"));
            doc.Add(new SortedSetDocValuesFacetField("b", "baz"));
            writer.AddDocument(config.Build(doc));
            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            Assert.AreEqual("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n", facets.GetTopChildren(10, "a").ToString());
            Assert.AreEqual("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.GetTopChildren(10, "b").ToString());

            // DrillDown:
            DrillDownQuery q = new DrillDownQuery(config);
            q.Add("a", "foo");
            q.Add("b", "baz");
            TopDocs hits = searcher.Search(q, 1);
            Assert.AreEqual(1, hits.TotalHits);

            IOUtils.Close(writer, searcher.IndexReader, dir);
        }
        public virtual void TestWithThreads()
        {
            // LUCENE-5303: OrdinalsCache used the ThreadLocal BinaryDV instead of reader.getCoreCacheKey().
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter writer = new IndexWriter(indexDir, conf);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();
            doc.Add(new FacetField("A", "1"));
            writer.AddDocument(config.Build(taxoWriter, doc));
            doc = new Document();
            doc.Add(new FacetField("A", "2"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            var reader = DirectoryReader.Open(writer, true);
            CachedOrdinalsReader ordsReader = new CachedOrdinalsReader(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
            ThreadClass[] threads = new ThreadClass[3];
            for (int i = 0; i < threads.Length; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper(this, "CachedOrdsThread-" + i, reader, ordsReader);
            }

            long ramBytesUsed = 0;
            foreach (ThreadClass t in threads)
            {
                t.Start();
                t.Join();
                if (ramBytesUsed == 0)
                {
                    ramBytesUsed = ordsReader.RamBytesUsed();
                }
                else
                {
                    Assert.AreEqual(ramBytesUsed, ordsReader.RamBytesUsed());
                }
            }

            IOUtils.Close(writer, taxoWriter, reader, indexDir, taxoDir);
        }
        public static void BeforeClass()
        {
            dir = NewDirectory();
            taxoDir = NewDirectory();
            // preparations - index, taxonomy, content

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            // Cannot mix ints & floats in the same indexed field:
            config = new FacetsConfig();
            config.SetIndexFieldName("int", "$facets.int");
            config.SetMultiValued("int", true);
            config.SetIndexFieldName("float", "$facets.float");
            config.SetMultiValued("float", true);

            var writer = new RandomIndexWriter(Random(), dir);

            // index documents, 50% have only 'b' and all have 'a'
            for (int i = 0; i < 110; i++)
            {
                Document doc = new Document();
                // every 11th document is added empty, this used to cause the association
                // aggregators to go into an infinite loop
                if (i % 11 != 0)
                {
                    doc.Add(new IntAssociationFacetField(2, "int", "a"));
                    doc.Add(new FloatAssociationFacetField(0.5f, "float", "a"));
                    if (i % 2 == 0) // 50
                    {
                        doc.Add(new IntAssociationFacetField(3, "int", "b"));
                        doc.Add(new FloatAssociationFacetField(0.2f, "float", "b"));
                    }
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.Reader;
            writer.Dispose();
            taxoReader = new DirectoryTaxonomyReader(taxoDir);
        }
Beispiel #9
0
        public virtual void TestCustomDoublesValueSource()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            writer.AddDocument(doc);
            writer.AddDocument(doc);
            writer.AddDocument(doc);

            // Test wants 3 docs in one segment:
            writer.ForceMerge(1);

            var vs = new ValueSourceAnonymousInnerClassHelper(this, doc);

            FacetsConfig config = new FacetsConfig();

            FacetsCollector fc = new FacetsCollector();

            IndexReader   r = writer.Reader;
            IndexSearcher s = NewSearcher(r);

            s.Search(new MatchAllDocsQuery(), fc);

            DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) };

            Filter        fastMatchFilter;
            AtomicBoolean filterWasUsed = new AtomicBoolean();

            if (Random().NextBoolean())
            {
                // Sort of silly:
                fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed);
            }
            else
            {
                fastMatchFilter = null;
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter);
            }

            Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges);

            Assert.AreEqual("dim=field path=[] value=3 childCount=6\n  < 1 (0)\n  < 2 (1)\n  < 5 (3)\n  < 10 (3)\n  < 20 (3)\n  < 50 (3)\n", facets.GetTopChildren(10, "field").ToString());
            Assert.True(fastMatchFilter == null || filterWasUsed.Get());

            DrillDownQuery ddq = new DrillDownQuery(config);

            ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs));

            // Test simple drill-down:
            Assert.AreEqual(1, s.Search(ddq, 10).TotalHits);

            // Test drill-sideways after drill-down
            DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter);


            DrillSidewaysResult dsr = ds.Search(ddq, 10);

            Assert.AreEqual(1, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=field path=[] value=3 childCount=6\n  < 1 (0)\n  < 2 (1)\n  < 5 (3)\n  < 10 (3)\n  < 20 (3)\n  < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            IOUtils.Dispose(r, writer, dir);
        }
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
Beispiel #11
0
 /// <summary>
 /// Create <see cref="FastTaxonomyFacetCounts"/>, using the
 /// specified <paramref name="indexFieldName"/> for ordinals.  Use
 /// this if you had set <see cref="FacetsConfig.SetIndexFieldName"/>
 /// to change the index
 /// field name for certain dimensions.
 /// </summary>
 public FastTaxonomyFacetCounts(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : base(indexFieldName, taxoReader, config)
 {
     Count(fc.GetMatchingDocs());
 }
Beispiel #12
0
 /// <summary>
 /// Create <see cref="TaxonomyFacetSumIntAssociations"/> against
 /// the specified index field.
 /// </summary>
 public TaxonomyFacetSumIntAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : base(indexFieldName, taxoReader, config)
 {
     SumValues(fc.GetMatchingDocs());
 }
 /// <summary>
 /// Sole constructor.
 /// </summary>
 protected internal FloatTaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
     : base(indexFieldName, taxoReader, config)
 {
     values = new float[taxoReader.Count];
 }
        public virtual void TestRandom()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet);
            string[]  tokens   = GetRandomTokens(10);
            Directory indexDir = NewDirectory();
            Directory taxoDir  = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt32(Random, 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.GetReader());

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random.Next(tokens.Length)];
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = new SortedSetDocValuesFacetCounts(state, fc);

                // Slow, yet hopefully bug-free, faceting:
                var expectedCounts = new List <Dictionary <string, int?> >();
                for (int i = 0; i < numDims; i++)
                {
                    expectedCounts.Add(new Dictionary <string, int?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken, StringComparison.Ordinal))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                if (!expectedCounts[j].TryGetValue(doc.dims[j], out int?v))
                                {
                                    expectedCounts[j][doc.dims[j]] = 1;
                                }
                                else
                                {
                                    expectedCounts[j][doc.dims[j]] = (int)v + 1;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    int totCount = 0;
                    foreach (KeyValuePair <string, int?> ent in expectedCounts[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totCount += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totCount > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                //sortTies(actual);

                CollectionAssert.AreEqual(expected, actual);
            }

            IOUtils.Dispose(w, searcher.IndexReader, indexDir, taxoDir);
        }
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            // Uses default $facets field:
            Facets facets;

            if (Random().NextBoolean())
            {
                facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            }
            else
            {
                OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
                if (Random().NextBoolean())
                {
                    ordsReader = new CachedOrdinalsReader(ordsReader);
                }
                facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
            }

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestRandom()
        {
            string[] tokens = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir);
            var tw = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();
            int numDocs = AtLeast(1000);
            int numDims = TestUtil.NextInt(Random(), 1, 7);
            IList<TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);
            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.Reader);

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            int iters = AtLeast(100);
            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random().Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = GetTaxonomyFacetCounts(tr, config, fc);

                // Slow, yet hopefully bug-free, faceting:
                var expectedCounts = new List<Dictionary<string, int?>>();
                for (int i = 0; i < numDims; i++)
                {
                    expectedCounts[i] = new Dictionary<string, int?>();
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                int? v = expectedCounts[j][doc.dims[j]];
                                if (v == null)
                                {
                                    expectedCounts[j][doc.dims[j]] = 1;
                                }
                                else
                                {
                                    expectedCounts[j][doc.dims[j]] = (int)v + 1;
                                }
                            }
                        }
                    }
                }

                IList<FacetResult> expected = new List<FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    IList<LabelAndValue> labelValues = new List<LabelAndValue>();
                    int totCount = 0;
                    foreach (KeyValuePair<string, int?> ent in expectedCounts[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totCount += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totCount > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList<FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                Assert.AreEqual(expected, actual);
            }

            IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
        public virtual void TestReallyNoNormsForDrillDown()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetSimilarity(new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this));
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path"));
            writer.AddDocument(config.Build(taxoWriter, doc));
            IOUtils.Close(writer, taxoWriter, dir, taxoDir);
        }
        public virtual void TestManyFacetsInOneDocument()
        {
            AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("dim", true);

            int numLabels = TestUtil.NextInt(Random(), 40000, 100000);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            for (int i = 0; i < numLabels; i++)
            {
                doc.Add(new FacetField("dim", "" + i));
            }
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            FacetResult result = facets.GetTopChildren(int.MaxValue, "dim");
            Assert.AreEqual(numLabels, result.LabelValues.Length);
            var allLabels = new HashSet<string>();
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                allLabels.Add(labelValue.label);
                Assert.AreEqual(1, (int)labelValue.value);
            }
            Assert.AreEqual(numLabels, allLabels.Count);

            IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir);
        }
        public virtual void TestMultiValuedHierarchy()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
            FacetsConfig config = new FacetsConfig();
            config.SetHierarchical("a", true);
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path", "x"));
            doc.Add(new FacetField("a", "path", "y"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            try
            {
                facets.GetSpecificValue("a");
                Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            FacetResult result = facets.GetTopChildren(10, "a");
            Assert.AreEqual(1, result.LabelValues.Length);
            Assert.AreEqual(1, (int)result.LabelValues[0].value);

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestLabelWithDelimiter()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("dim", true);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "test\u001Fone"));
            doc.Add(new FacetField("dim", "test\u001Etwo"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo"));

            FacetResult result = facets.GetTopChildren(10, "dim");
            Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n  test\u001Fone (1)\n  test\u001Etwo (1)\n", result.ToString());
            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestGetFacetResultsTwice()
        {
            // LUCENE-4893: counts were multiplied as many times as getFacetResults was called.
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();
            doc.Add(new FacetField("a", "1"));
            doc.Add(new FacetField("b", "1"));
            iw.AddDocument(config.Build(taxoWriter, doc));

            DirectoryReader r = DirectoryReader.Open(iw, true);
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector sfc = new FacetsCollector();
            NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            IList<FacetResult> res1 = facets.GetAllDims(10);
            IList<FacetResult> res2 = facets.GetAllDims(10);
            Assert.AreEqual(res1, res2, "calling getFacetResults twice should return the .equals()=true result");

            IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
        }
        public virtual void TestDetectMultiValuedField()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path"));
            doc.Add(new FacetField("a", "path2"));
            try
            {
                config.Build(taxoWriter, doc);
                Fail("did not hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }
            IOUtils.Close(writer, taxoWriter, dir, taxoDir);
        }
Beispiel #23
0
        public virtual void TestMixedRangeAndNonRangeTaxonomy()
        {
            Directory               d  = NewDirectory();
            RandomIndexWriter       w  = new RandomIndexWriter(Random(), d, Similarity, TimeZone);
            Directory               td = NewDirectory();
            DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            for (long l = 0; l < 100; l++)
            {
                Document doc = new Document();
                // For computing range facet counts:
                doc.Add(new NumericDocValuesField("field", l));
                // For drill down by numeric range:
                doc.Add(new Int64Field("field", l, Field.Store.NO));

                if ((l & 3) == 0)
                {
                    doc.Add(new FacetField("dim", "a"));
                }
                else
                {
                    doc.Add(new FacetField("dim", "b"));
                }
                w.AddDocument(config.Build(tw, doc));
            }

            IndexReader r = w.Reader;

            var tr = new DirectoryTaxonomyReader(tw);

            IndexSearcher s = NewSearcher(r);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + s);
            }

            DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr);

            // First search, no drill downs:
            DrillDownQuery      ddq = new DrillDownQuery(config);
            DrillSidewaysResult dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(100, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            // Second search, drill down on dim=b:
            ddq = new DrillDownQuery(config);
            ddq.Add("dim", "b");
            dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(75, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=16 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            // Third search, drill down on "less than or equal to 10":
            ddq = new DrillDownQuery(config);
            ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true));
            dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(11, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n  b (8)\n  a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());
            IOUtils.Dispose(tw, tr, td, w, r, d);
        }
        public virtual void TestRequireDimCount()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetRequireDimCount("dim", true);

            config.SetMultiValued("dim2", true);
            config.SetRequireDimCount("dim2", true);

            config.SetMultiValued("dim3", true);
            config.SetHierarchical("dim3", true);
            config.SetRequireDimCount("dim3", true);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "a"));
            doc.Add(new FacetField("dim2", "a"));
            doc.Add(new FacetField("dim2", "b"));
            doc.Add(new FacetField("dim3", "a", "b"));
            doc.Add(new FacetField("dim3", "a", "c"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value);
            try
            {
                Assert.AreEqual(1, facets.GetSpecificValue("dim"));
                Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }
            Assert.AreEqual(1, facets.GetSpecificValue("dim2"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim3"));
            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #25
0
        public virtual void TestRandomDoubles()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            int numDocs = AtLeast(1000);

            double[] values   = new double[numDocs];
            double   minValue = double.PositiveInfinity;
            double   maxValue = double.NegativeInfinity;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                double   v   = Random().NextDouble();
                values[i] = v;
                doc.Add(new DoubleDocValuesField("field", v));
                doc.Add(new DoubleField("field", v, Field.Store.NO));
                w.AddDocument(doc);
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }
            IndexReader r = w.Reader;

            IndexSearcher s      = NewSearcher(r);
            FacetsConfig  config = new FacetsConfig();

            int numIters = AtLeast(10);

            for (int iter = 0; iter < numIters; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                int           numRange         = TestUtil.NextInt(Random(), 1, 5);
                DoubleRange[] ranges           = new DoubleRange[numRange];
                int[]         expectedCounts   = new int[numRange];
                double        minAcceptedValue = double.PositiveInfinity;
                double        maxAcceptedValue = double.NegativeInfinity;
                for (int rangeID = 0; rangeID < numRange; rangeID++)
                {
                    double min;
                    if (rangeID > 0 && Random().Next(10) == 7)
                    {
                        // Use an existing boundary:
                        DoubleRange prevRange = ranges[Random().Next(rangeID)];
                        if (Random().NextBoolean())
                        {
                            min = prevRange.Min;
                        }
                        else
                        {
                            min = prevRange.Max;
                        }
                    }
                    else
                    {
                        min = Random().NextDouble();
                    }
                    double max;
                    if (rangeID > 0 && Random().Next(10) == 7)
                    {
                        // Use an existing boundary:
                        DoubleRange prevRange = ranges[Random().Next(rangeID)];
                        if (Random().NextBoolean())
                        {
                            max = prevRange.Min;
                        }
                        else
                        {
                            max = prevRange.Max;
                        }
                    }
                    else
                    {
                        max = Random().NextDouble();
                    }

                    if (min > max)
                    {
                        double x = min;
                        min = max;
                        max = x;
                    }

                    bool minIncl;
                    bool maxIncl;
                    if (min == max)
                    {
                        minIncl = true;
                        maxIncl = true;
                    }
                    else
                    {
                        minIncl = Random().NextBoolean();
                        maxIncl = Random().NextBoolean();
                    }
                    ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl);

                    // Do "slow but hopefully correct" computation of
                    // expected count:
                    for (int i = 0; i < numDocs; i++)
                    {
                        bool accept = true;
                        if (minIncl)
                        {
                            accept &= values[i] >= min;
                        }
                        else
                        {
                            accept &= values[i] > min;
                        }
                        if (maxIncl)
                        {
                            accept &= values[i] <= max;
                        }
                        else
                        {
                            accept &= values[i] < max;
                        }
                        if (accept)
                        {
                            expectedCounts[rangeID]++;
                            minAcceptedValue = Math.Min(minAcceptedValue, values[i]);
                            maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]);
                        }
                    }
                }

                FacetsCollector sfc = new FacetsCollector();
                s.Search(new MatchAllDocsQuery(), sfc);
                Filter fastMatchFilter;
                if (Random().NextBoolean())
                {
                    if (Random().NextBoolean())
                    {
                        fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true);
                    }
                    else
                    {
                        fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true);
                    }
                }
                else
                {
                    fastMatchFilter = null;
                }
                ValueSource vs     = new DoubleFieldSource("field");
                Facets      facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
                FacetResult result = facets.GetTopChildren(10, "field");
                Assert.AreEqual(numRange, result.LabelValues.Length);
                for (int rangeID = 0; rangeID < numRange; rangeID++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  range " + rangeID + " expectedCount=" + expectedCounts[rangeID]);
                    }
                    LabelAndValue subNode = result.LabelValues[rangeID];
                    Assert.AreEqual("r" + rangeID, subNode.Label);
                    Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value);

                    DoubleRange range = ranges[rangeID];

                    // Test drill-down:
                    DrillDownQuery ddq = new DrillDownQuery(config);
                    if (Random().NextBoolean())
                    {
                        if (Random().NextBoolean())
                        {
                            ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive));
                        }
                        else
                        {
                            ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive));
                        }
                    }
                    else
                    {
                        ddq.Add("field", range.GetFilter(fastMatchFilter, vs));
                    }

                    Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits);
                }
            }

            IOUtils.Dispose(w, r, dir);
        }
        public virtual void TestSegmentsWithoutCategoriesOrResults()
        {
            // tests the accumulator when there are segments with no results
            var indexDir = NewDirectory();
            var taxoDir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges
            IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();
            indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok
            indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories
            indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher indexSearcher = NewSearcher(indexReader);

            // search for "f:a", only segments 1 and 3 should match results
            Query q = new TermQuery(new Term("f", "a"));
            FacetsCollector sfc = new FacetsCollector();
            indexSearcher.Search(q, sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            FacetResult result = facets.GetTopChildren(10, "A");
            Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children");
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(2, (int)labelValue.value, "wrong weight for child " + labelValue.label);
            }

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
        public virtual void TestSeparateIndexedFields()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            FacetsConfig config = new FacetsConfig();
            config.SetIndexFieldName("b", "$b");

            for (int i = AtLeast(30); i > 0; --i)
            {
                Document doc = new Document();
                doc.Add(new StringField("f", "v", Field.Store.NO));
                doc.Add(new FacetField("a", "1"));
                doc.Add(new FacetField("b", "1"));
                iw.AddDocument(config.Build(taxoWriter, doc));
            }

            DirectoryReader r = DirectoryReader.Open(iw, true);
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector sfc = new FacetsCollector();
            NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
            Facets facets1 = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            Facets facets2 = GetTaxonomyFacetCounts(taxoReader, config, sfc, "$b");
            Assert.AreEqual(r.MaxDoc, (int)facets1.GetTopChildren(10, "a").Value);
            Assert.AreEqual(r.MaxDoc, (int)facets2.GetTopChildren(10, "b").Value);
            IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
        }
        private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent)
        {
            for (int i = 0; i < 2; i++)
            {
                Document doc = new Document();
                if (withContent)
                {
                    doc.Add(new StringField("f", "a", Field.Store.NO));
                }
                if (config != null)
                {
                    doc.Add(new FacetField("A", Convert.ToString(i)));
                    indexWriter.AddDocument(config.Build(taxoWriter, doc));
                }
                else
                {
                    indexWriter.AddDocument(doc);
                }
            }

            indexWriter.Commit();
        }
        public virtual void TestSparseFacets()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);

            // Ask for top 10 labels for any dims that have counts:
            IList<FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n  foo2 (1)\n  foo3 (1)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=2 childCount=2\n  bar1 (1)\n  bar2 (1)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", results[2].ToString());

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
Beispiel #31
0
 /// <summary>
 /// Create <see cref="TaxonomyFacetSumSingleAssociations"/> against
 /// the default index field.
 /// </summary>
 public TaxonomyFacetSumSingleAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
 {
 }
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetIndexFieldName("a", "$facets2");
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            Document doc = new Document();
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);

            // Uses default $facets field:
            Facets facets;
            if (Random().NextBoolean())
            {
                facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            }
            else
            {
                OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
                if (Random().NextBoolean())
                {
                    ordsReader = new CachedOrdinalsReader(ordsReader);
                }
                facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
            }

            // Ask for top 10 labels for any dims that have counts:
            IList<FacetResult> results = facets.GetAllDims(10);
            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestCountRoot()
        {
            // LUCENE-4882: FacetsAccumulator threw NPE if a FacetRequest was defined on CP.EMPTY
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            FacetsConfig config = new FacetsConfig();
            for (int i = AtLeast(30); i > 0; --i)
            {
                Document doc = new Document();
                doc.Add(new FacetField("a", "1"));
                doc.Add(new FacetField("b", "1"));
                iw.AddDocument(config.Build(taxoWriter, doc));
            }

            DirectoryReader r = DirectoryReader.Open(iw, true);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector sfc = new FacetsCollector();
            NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            foreach (FacetResult result in facets.GetAllDims(10))
            {
                Assert.AreEqual(r.NumDocs, (int)result.Value);
            }

            IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
        }
        public virtual void TestBasic()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetHierarchical("Publish Date", true);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            Document doc = new Document();
            doc.Add(new FacetField("Author", "Bob"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Susan"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Frank"));
            doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);

            // Retrieve & verify results:
            Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n  2010 (2)\n  2012 (2)\n  1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString());
            Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", facets.GetTopChildren(10, "Author").ToString());

            // Now user drills down on Publish Date/2010:
            DrillDownQuery q2 = new DrillDownQuery(config);
            q2.Add("Publish Date", "2010");
            c = new FacetsCollector();
            searcher.Search(q2, c);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString());

            Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa"));

            Assert.Null(facets.GetTopChildren(10, "Non exitent dim"));

            // Smoke test PrintTaxonomyStats:
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            PrintTaxonomyStats.PrintStats(taxoReader, Console.Out, true);
            string result = bos.ToString();
            Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1);
            // Make sure at least a few nodes of the tree came out:
            Assert.True(result.IndexOf("  /1999", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("  /2012", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("      /20", StringComparison.Ordinal) != -1);

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
Beispiel #35
0
 /// <summary>
 /// Create <see cref="FastTaxonomyFacetCounts"/>, which also
 /// counts all facet labels.
 /// </summary>
 public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
 {
 }
        private void indexTwoDocs(TaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent)
        {
            for (int i = 0; i < 2; i++)
            {
                Document doc = new Document();
                if (withContent)
                {
                    doc.Add(new StringField("f", "a", Field.Store.NO));
                }
                if (config != null)
                {
                    doc.Add(new FacetField("A", Convert.ToString(i)));
                    indexWriter.AddDocument(config.Build(taxoWriter, doc));
                }
                else
                {
                    indexWriter.AddDocument(doc);
                }
            }

            indexWriter.Commit();
        }
Beispiel #37
0
        /// <summary>
        /// Sole constructor.
        /// </summary>
        protected internal TaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
        {
            this.m_indexFieldName = indexFieldName;
            this.m_taxoReader     = taxoReader;
            this.m_config         = config;
            ParallelTaxonomyArrays pta = taxoReader.ParallelTaxonomyArrays;

            m_children = pta.Children;
            m_siblings = pta.Siblings;
        }
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n  foo2 (1)\n  foo3 (1)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=2 childCount=2\n  bar1 (1)\n  bar2 (1)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", results[2].ToString());

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
Beispiel #39
0
 public DrillSidewaysAnonymousInnerClassHelper2(TestRangeFacetCounts outerInstance, IndexSearcher indexSearcher, FacetsConfig facetsConfig, TaxonomyReader org, ValueSource valueSource, DoubleRange[] doubleRanges, Filter filter)
     : base(indexSearcher, facetsConfig, org)
 {
     this.outerInstance   = outerInstance;
     this.vs              = valueSource;
     this.ranges          = doubleRanges;
     this.fastMatchFilter = filter;
 }
Beispiel #40
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #41
0
 public DrillSidewaysAnonymousInnerClassHelper(TestRangeFacetCounts outerInstance, IndexSearcher s, FacetsConfig config, TaxonomyReader tr)
     : base(s, config, tr)
 {
     this.outerInstance = outerInstance;
 }
Beispiel #42
0
        public virtual void TestConcurrency()
        {
            AtomicInt32 numDocs  = new AtomicInt32(AtLeast(10000));
            Directory   indexDir = NewDirectory();
            Directory   taxoDir  = NewDirectory();
            ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>();
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            var         tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs.Get()));

            ThreadClass[] indexThreads = new ThreadClass[AtLeast(4)];
            FacetsConfig  config       = new FacetsConfig();

            for (int i = 0; i < 10; i++)
            {
                config.SetHierarchical("l1." + i, true);
                config.SetMultiValued("l1." + i, true);
            }

            for (int i = 0; i < indexThreads.Length; i++)
            {
                indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config);
            }

            foreach (ThreadClass t in indexThreads)
            {
                t.Start();
            }
            foreach (ThreadClass t in indexThreads)
            {
                t.Join();
            }

            var tr = new DirectoryTaxonomyReader(tw);

            // +1 for root category
            if (values.Count + 1 != tr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (tr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = null;
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = tr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir);
        }
        public virtual void TestSparseFacets()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet);
            Directory dir = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            FacetsConfig config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new SortedSetDocValuesFacetField("a", "foo1"));
            writer.AddDocument(config.Build(doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo2"));
            doc.Add(new SortedSetDocValuesFacetField("b", "bar1"));
            writer.AddDocument(config.Build(doc));

            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo3"));
            doc.Add(new SortedSetDocValuesFacetField("b", "bar2"));
            doc.Add(new SortedSetDocValuesFacetField("c", "baz1"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);
            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n  foo2 (1)\n  foo3 (1)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=2 childCount=2\n  bar1 (1)\n  bar2 (1)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", results[2].ToString());

            searcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #44
0
 public ThreadAnonymousInnerClassHelper(TestConcurrentFacetedIndexing outerInstance, AtomicInt32 numDocs, ConcurrentDictionary <string, string> values, IndexWriter iw, Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter tw, FacetsConfig config)
 {
     this.outerInstance = outerInstance;
     this.numDocs       = numDocs;
     this.values        = values;
     this.iw            = iw;
     this.tw            = tw;
     this.config        = config;
 }
        public override int GetOrdinal(FacetLabel cp)
        {
            EnsureOpen();
            if (cp.Length == 0)
            {
                return(ROOT_ORDINAL);
            }

            // First try to find the answer in the LRU cache:
            lock (ordinalCache)
            {
                IntClass res = ordinalCache.Get(cp);
                if (res != null && res.IntItem != null)
                {
                    if ((int)res.IntItem.Value < indexReader.MaxDoc)
                    {
                        // Since the cache is shared with DTR instances allocated from
                        // doOpenIfChanged, we need to ensure that the ordinal is one that
                        // this DTR instance recognizes.
                        return((int)res.IntItem.Value);
                    }
                    else
                    {
                        // if we get here, it means that the category was found in the cache,
                        // but is not recognized by this TR instance. Therefore there's no
                        // need to continue search for the path on disk, because we won't find
                        // it there too.
                        return(TaxonomyReader.INVALID_ORDINAL);
                    }
                }
            }

            // If we're still here, we have a cache miss. We need to fetch the
            // value from disk, and then also put it in the cache:
            int      ret  = TaxonomyReader.INVALID_ORDINAL;
            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);

            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                ret = docs.DocID();

                // we only store the fact that a category exists, not its inexistence.
                // This is required because the caches are shared with new DTR instances
                // that are allocated from doOpenIfChanged. Therefore, if we only store
                // information about found categories, we cannot accidently tell a new
                // generation of DTR that a category does not exist.
                lock (ordinalCache)
                {
                    ordinalCache.Put(cp, new IntClass {
                        IntItem = Convert.ToInt32(ret)
                    });
                }
            }

            return(ret);
        }
Beispiel #46
0
        /// <summary>
        /// Look up the given category in the cache and/or the on-disk storage,
        /// returning the category's ordinal, or a negative number in case the
        /// category does not yet exist in the taxonomy.
        /// </summary>
        protected virtual int FindCategory(FacetLabel categoryPath)
        {
            lock (this)
            {
                // If we can find the category in the cache, or we know the cache is
                // complete, we can return the response directly from it
                int res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    return(res);
                }

                cacheMisses.IncrementAndGet();
                // After a few cache misses, it makes sense to read all the categories
                // from disk and into the cache. The reason not to do this on the first
                // cache miss (or even when opening the writer) is that it will
                // significantly slow down the case when a taxonomy is opened just to
                // add one category. The idea only spending a long time on reading
                // after enough time was spent on cache misses is known as an "online
                // algorithm".
                PerhapsFillCache();
                res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    // if after filling the cache from the info on disk, the category is in it
                    // or the cache is complete, return whatever cache.get returned.
                    return(res);
                }

                // if we get here, it means the category is not in the cache, and it is not
                // complete, and therefore we must look for the category on disk.

                // We need to get an answer from the on-disk index.
                InitReaderManager();

                int             doc    = -1;
                DirectoryReader reader = readerManager.Acquire();
                try
                {
                    BytesRef  catTerm   = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
                    TermsEnum termsEnum = null; // reuse
                    DocsEnum  docs      = null; // reuse
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                            if (termsEnum.SeekExact(catTerm))
                            {
                                // liveDocs=null because the taxonomy has no deletes
                                docs = termsEnum.Docs(null, docs, 0); // freqs not required
                                // if the term was found, we know it has exactly one document.
                                doc = docs.NextDoc() + ctx.DocBase;
                                break;
                            }
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }
                if (doc > 0)
                {
                    AddToCache(categoryPath, doc);
                }
                return(doc);
            }
        }
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
 /// <summary>
 /// Create <see cref="TaxonomyFacetSumIntAssociations"/> against
 /// the default index field. 
 /// </summary>
 public TaxonomyFacetSumIntAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc)
 {
 }
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetHierarchical("Publish Date", true);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new FacetField("Author", "Bob"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "15"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2010", "10", "20"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Lisa"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Susan"));
            doc.Add(new FacetField("Publish Date", "2012", "1", "7"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new FacetField("Author", "Frank"));
            doc.Add(new FacetField("Publish Date", "1999", "5", "5"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c);

            // Retrieve & verify results:
            Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n  2010 (2)\n  2012 (2)\n  1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString());
            Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", facets.GetTopChildren(10, "Author").ToString());

            // Now user drills down on Publish Date/2010:
            DrillDownQuery q2 = new DrillDownQuery(config);

            q2.Add("Publish Date", "2010");
            c = new FacetsCollector();
            searcher.Search(q2, c);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString());

            Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa"));

            Assert.Null(facets.GetTopChildren(10, "Non exitent dim"));

            // Smoke test PrintTaxonomyStats:
            string result;

            using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
            {
                using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true)
                {
                    AutoFlush = true
                })
                {
                    PrintTaxonomyStats.PrintStats(taxoReader, w, true);
                }
                result = bos.ToString();
            }
            Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1);
            // Make sure at least a few nodes of the tree came out:
            Assert.True(result.IndexOf("  /1999", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("  /2012", StringComparison.Ordinal) != -1);
            Assert.True(result.IndexOf("      /20", StringComparison.Ordinal) != -1);

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir);
        }
        public virtual void TestRandomLongs()
        {
            Directory dir = NewDirectory();
            var w = new RandomIndexWriter(Random(), dir);

            int numDocs = AtLeast(1000);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }
            long[] values = new long[numDocs];
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                long v = Random().NextLong();
                values[i] = v;
                doc.Add(new NumericDocValuesField("field", v));
                doc.Add(new LongField("field", v, Field.Store.NO));
                w.AddDocument(doc);
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }
            IndexReader r = w.Reader;

            IndexSearcher s = NewSearcher(r);
            FacetsConfig config = new FacetsConfig();

            int numIters = AtLeast(10);
            for (int iter = 0; iter < numIters; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                int numRange = TestUtil.NextInt(Random(), 1, 100);
                LongRange[] ranges = new LongRange[numRange];
                int[] expectedCounts = new int[numRange];
                long minAcceptedValue = long.MaxValue;
                long maxAcceptedValue = long.MinValue;
                for (int rangeID = 0; rangeID < numRange; rangeID++)
                {
                    long min;
                    if (rangeID > 0 && Random().Next(10) == 7)
                    {
                        // Use an existing boundary:
                        LongRange prevRange = ranges[Random().Next(rangeID)];
                        if (Random().NextBoolean())
                        {
                            min = prevRange.min;
                        }
                        else
                        {
                            min = prevRange.max;
                        }
                    }
                    else
                    {
                        min = Random().NextLong();
                    }
                    long max;
                    if (rangeID > 0 && Random().Next(10) == 7)
                    {
                        // Use an existing boundary:
                        LongRange prevRange = ranges[Random().Next(rangeID)];
                        if (Random().NextBoolean())
                        {
                            max = prevRange.min;
                        }
                        else
                        {
                            max = prevRange.max;
                        }
                    }
                    else
                    {
                        max = Random().NextLong();
                    }

                    if (min > max)
                    {
                        long x = min;
                        min = max;
                        max = x;
                    }
                    bool minIncl;
                    bool maxIncl;
                    if (min == max)
                    {
                        minIncl = true;
                        maxIncl = true;
                    }
                    else
                    {
                        minIncl = Random().NextBoolean();
                        maxIncl = Random().NextBoolean();
                    }
                    ranges[rangeID] = new LongRange("r" + rangeID, min, minIncl, max, maxIncl);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  range " + rangeID + ": " + ranges[rangeID]);
                    }

                    // Do "slow but hopefully correct" computation of
                    // expected count:
                    for (int i = 0; i < numDocs; i++)
                    {
                        bool accept = true;
                        if (minIncl)
                        {
                            accept &= values[i] >= min;
                        }
                        else
                        {
                            accept &= values[i] > min;
                        }
                        if (maxIncl)
                        {
                            accept &= values[i] <= max;
                        }
                        else
                        {
                            accept &= values[i] < max;
                        }
                        if (accept)
                        {
                            expectedCounts[rangeID]++;
                            minAcceptedValue = Math.Min(minAcceptedValue, values[i]);
                            maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]);
                        }
                    }
                }

                FacetsCollector sfc = new FacetsCollector();
                s.Search(new MatchAllDocsQuery(), sfc);
                Filter fastMatchFilter;
                if (Random().NextBoolean())
                {
                    if (Random().NextBoolean())
                    {
                        fastMatchFilter = NumericRangeFilter.NewLongRange("field", minValue, maxValue, true, true);
                    }
                    else
                    {
                        fastMatchFilter = NumericRangeFilter.NewLongRange("field", minAcceptedValue, maxAcceptedValue, true, true);
                    }
                }
                else
                {
                    fastMatchFilter = null;
                }
                ValueSource vs = new LongFieldSource("field");
                Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
                FacetResult result = facets.GetTopChildren(10, "field");
                Assert.AreEqual(numRange, result.LabelValues.Length);
                for (int rangeID = 0; rangeID < numRange; rangeID++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  range " + rangeID + " expectedCount=" + expectedCounts[rangeID]);
                    }
                    LabelAndValue subNode = result.LabelValues[rangeID];
                    Assert.AreEqual("r" + rangeID, subNode.label);
                    Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value);

                    LongRange range = ranges[rangeID];

                    // Test drill-down:
                    DrillDownQuery ddq = new DrillDownQuery(config);
                    if (Random().NextBoolean())
                    {
                        if (Random().NextBoolean())
                        {
                            ddq.Add("field", NumericRangeFilter.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
                        }
                        else
                        {
                            ddq.Add("field", NumericRangeQuery.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
                        }
                    }
                    else
                    {
                        ddq.Add("field", range.GetFilter(fastMatchFilter, vs));
                    }
                    Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits);
                }
            }

            IOUtils.Close(w, r, dir);
        }
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w      = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt(Random(), 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.Reader);

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random().Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = GetTaxonomyFacetCounts(tr, config, fc);

                // Slow, yet hopefully bug-free, faceting:
                var expectedCounts = new List <Dictionary <string, int?> >();
                for (int i = 0; i < numDims; i++)
                {
                    expectedCounts.Add(new Dictionary <string, int?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null;
                                if (v == null)
                                {
                                    expectedCounts[j][doc.dims[j]] = 1;
                                }
                                else
                                {
                                    expectedCounts[j][doc.dims[j]] = (int)v + 1;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    int totCount = 0;
                    foreach (KeyValuePair <string, int?> ent in expectedCounts[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totCount += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totCount > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                Assert.AreEqual(expected, actual);
            }

            IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
Beispiel #52
0
        public override int GetOrdinal(FacetLabel cp)
        {
            EnsureOpen();
            if (cp.Length == 0)
            {
                return(ROOT_ORDINAL);
            }

            // First try to find the answer in the LRU cache:

            // LUCENENET: Despite LRUHashMap being thread-safe, we get much better performance
            // if reads are separated from writes.
            ordinalCacheLock.EnterReadLock();
            try
            {
                if (ordinalCache.TryGetValue(cp, out Int32Class res))
                {
                    if (res < indexReader.MaxDoc)
                    {
                        // Since the cache is shared with DTR instances allocated from
                        // doOpenIfChanged, we need to ensure that the ordinal is one that
                        // this DTR instance recognizes.
                        return(res);
                    }
                    else
                    {
                        // if we get here, it means that the category was found in the cache,
                        // but is not recognized by this TR instance. Therefore there's no
                        // need to continue search for the path on disk, because we won't find
                        // it there too.
                        return(TaxonomyReader.INVALID_ORDINAL);
                    }
                }
            }
            finally
            {
                ordinalCacheLock.ExitReadLock();
            }

            // If we're still here, we have a cache miss. We need to fetch the
            // value from disk, and then also put it in the cache:
            int      ret  = TaxonomyReader.INVALID_ORDINAL;
            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);

            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                ret = docs.DocID;

                // we only store the fact that a category exists, not its inexistence.
                // This is required because the caches are shared with new DTR instances
                // that are allocated from doOpenIfChanged. Therefore, if we only store
                // information about found categories, we cannot accidently tell a new
                // generation of DTR that a category does not exist.

                ordinalCacheLock.EnterWriteLock();
                try
                {
                    ordinalCache[cp] = ret;
                }
                finally
                {
                    ordinalCacheLock.ExitWriteLock();
                }
            }

            return(ret);
        }
Beispiel #53
0
 /// <summary>
 /// Aggreggates float facet values from the provided
 /// <see cref="ValueSource"/>, pulling ordinals using <see cref="DocValuesOrdinalsReader"/>
 /// against the default indexed
 /// facet field <see cref="FacetsConfig.DEFAULT_INDEX_FIELD_NAME"/>.
 /// </summary>
 public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig config,
                                    FacetsCollector fc, ValueSource valueSource)
     : this(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME),
            taxoReader, config, fc, valueSource)
 {
 }
 private static void AddFacets(Document doc, FacetsConfig config, bool updateTermExpectedCounts)
 {
     IList<FacetField> docCategories = RandomCategories(Random());
     foreach (FacetField ff in docCategories)
     {
         doc.Add(ff);
         string cp = ff.dim + "/" + ff.path[0];
         allExpectedCounts[cp] = allExpectedCounts[cp] + 1;
         if (updateTermExpectedCounts)
         {
             termExpectedCounts[cp] = termExpectedCounts[cp] + 1;
         }
     }
     // add 1 to each NO_PARENTS dimension
     allExpectedCounts[CP_B] = allExpectedCounts[CP_B] + 1;
     allExpectedCounts[CP_C] = allExpectedCounts[CP_C] + 1;
     allExpectedCounts[CP_D] = allExpectedCounts[CP_D] + 1;
     if (updateTermExpectedCounts)
     {
         termExpectedCounts[CP_B] = termExpectedCounts[CP_B] + 1;
         termExpectedCounts[CP_C] = termExpectedCounts[CP_C] + 1;
         termExpectedCounts[CP_D] = termExpectedCounts[CP_D] + 1;
     }
 }
        public virtual void TestHugeLabel()
        {
            Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
            FacetsConfig config = new FacetsConfig();

            // Add one huge label:
            string bigs = null;
            int ordinal = -1;

            int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator
            bigs = TestUtil.RandomSimpleString(Random(), len, len);
            FacetField ff = new FacetField("dim", bigs);
            FacetLabel cp = new FacetLabel("dim", bigs);
            ordinal = taxoWriter.AddCategory(cp);
            Document doc = new Document();
            doc.Add(ff);
            indexWriter.AddDocument(config.Build(taxoWriter, doc));

            // Add tiny ones to cause a re-hash
            for (int i = 0; i < 3; i++)
            {
                string s = TestUtil.RandomSimpleString(Random(), 1, 10);
                taxoWriter.AddCategory(new FacetLabel("dim", s));
                doc = new Document();
                doc.Add(new FacetField("dim", s));
                indexWriter.AddDocument(config.Build(taxoWriter, doc));
            }

            // when too large components were allowed to be added, this resulted in a new added category
            Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp));

            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = new IndexSearcher(indexReader);
            DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig());
            ddq.Add("dim", bigs);
            Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits);

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new IntField("num", 20, Field.Store.NO));
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new IntField("num", 30, Field.Store.NO));
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n  foo3 (30.0)\n  foo2 (20.0)\n  foo1 (10.0)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n  bar2 (30.0)\n  bar1 (20.0)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n  baz1 (30.0)\n", results[2].ToString());

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #57
0
        // we need to guarantee that if several threads call this concurrently, only
        // one executes it, and after it returns, the cache is updated and is either
        // complete or not.
        private void PerhapsFillCache()
        {
            lock (this)
            {
                if (cacheMisses < cacheMissesUntilFill)
                {
                    return;
                }

                if (!shouldFillCache)
                {
                    // we already filled the cache once, there's no need to re-fill it
                    return;
                }
                shouldFillCache = false;

                InitReaderManager();

                bool            aborted = false;
                DirectoryReader reader  = readerManager.Acquire();
                try
                {
                    TermsEnum termsEnum = null;
                    DocsEnum  docsEnum  = null;
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                        if (terms != null) // cannot really happen, but be on the safe side
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                            while (termsEnum.Next() != null)
                            {
                                if (!cache.IsFull)
                                {
                                    BytesRef t = termsEnum.Term;
                                    // Since we guarantee uniqueness of categories, each term has exactly
                                    // one document. Also, since we do not allow removing categories (and
                                    // hence documents), there are no deletions in the index. Therefore, it
                                    // is sufficient to call next(), and then doc(), exactly once with no
                                    // 'validation' checks.
                                    FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString()));
                                    docsEnum = termsEnum.Docs(null, docsEnum, DocsFlags.NONE);
                                    bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase);
                                    Debug.Assert(!res, "entries should not have been evicted from the cache");
                                }
                                else
                                {
                                    // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
                                    aborted = true;
                                    break;
                                }
                            }
                        }
                        if (aborted)
                        {
                            break;
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }

                cacheIsComplete = !aborted;
                if (cacheIsComplete)
                {
                    lock (this)
                    {
                        // everything is in the cache, so no need to keep readerManager open.
                        // this block is executed in a sync block so that it works well with
                        // initReaderManager called in parallel.
                        readerManager.Dispose();
                        readerManager            = null;
                        initializedReaderManager = false;
                    }
                }
            }
        }
        public virtual void TestChildCount()
        {
            // LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
            var indexDir = NewDirectory();
            var taxoDir = NewDirectory();

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            FacetsConfig config = new FacetsConfig();
            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(new FacetField("a", Convert.ToString(i)));
                iw.AddDocument(config.Build(taxoWriter, doc));
            }

            DirectoryReader r = DirectoryReader.Open(iw, true);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector sfc = new FacetsCollector();
            NewSearcher(r).Search(new MatchAllDocsQuery(), sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);

            Assert.AreEqual(10, facets.GetTopChildren(2, "a").ChildCount);

            IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir);
        }
 /// <summary>
 /// Create <see cref="TaxonomyFacetSumIntAssociations"/> against
 /// the specified index field. 
 /// </summary>
 public TaxonomyFacetSumIntAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
     : base(indexFieldName, taxoReader, config)
 {
     SumValues(fc.GetMatchingDocs());
 }
        public virtual void TestConcurrency()
        {
            int                 ncats   = AtLeast(100000); // add many categories
            int                 range   = ncats * 3;       // affects the categories selection
            AtomicInteger       numCats = new AtomicInteger(ncats);
            Directory           dir     = NewDirectory();
            var                 values  = new ConcurrentDictionary <string, string>();
            double              d       = Random().NextDouble();
            TaxonomyWriterCache cache;

            if (d < 0.7)
            {
                // this is the fastest, yet most memory consuming
                cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
            }
            else if (TEST_NIGHTLY && d > 0.98)
            {
                // this is the slowest, but tests the writer concurrency when no caching is done.
                // only pick it during NIGHTLY tests, and even then, with very low chances.
                cache = NO_OP_CACHE;
            }
            else
            {
                // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
                cache = new LruTaxonomyWriterCache(ncats / 10);
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: use cache=" + cache);
            }
            var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);

            ThreadClass[] addThreads = new ThreadClass[AtLeast(4)];
            for (int z = 0; z < addThreads.Length; z++)
            {
                addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw);
            }

            foreach (var t in addThreads)
            {
                t.Start();
            }
            foreach (var t in addThreads)
            {
                t.Join();
            }
            tw.Dispose();

            DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);

            // +1 for root category
            if (values.Count + 1 != dtr.Size)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (dtr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }

            int[] parents = dtr.ParallelTaxonomyArrays.Parents();
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = new FacetLabel();
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = dtr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Close(dtr, dir);
        }