public virtual void TestRequireDimCount()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            RandomIndexWriter writer  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetRequireDimCount("dim", true);

            config.SetMultiValued("dim2", true);
            config.SetRequireDimCount("dim2", true);

            config.SetMultiValued("dim3", true);
            config.SetHierarchical("dim3", true);
            config.SetRequireDimCount("dim3", true);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "a"));
            doc.Add(new FacetField("dim2", "a"));
            doc.Add(new FacetField("dim2", "b"));
            doc.Add(new FacetField("dim3", "a", "b"));
            doc.Add(new FacetField("dim3", "a", "c"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value);
            try
            {
                Assert.AreEqual(1, facets.GetSpecificValue("dim"));
                fail("didn't hit expected exception");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // expected
            }
            Assert.AreEqual(1, facets.GetSpecificValue("dim2"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim3"));
            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #2
0
 /// <summary>Empty constructor</summary>
 public AssociationsFacetsExample()
 {
     config = new FacetsConfig();
     config.SetMultiValued("tags", true);
     config.SetIndexFieldName("tags", "$tags");
     config.SetMultiValued("genre", true);
     config.SetIndexFieldName("genre", "$genre");
 }
        public virtual void TestRequireDimCount()
        {
            Store.Directory         dir        = NewDirectory();
            Store.Directory         taxoDir    = NewDirectory();
            RandomIndexWriter       writer     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetRequireDimCount("dim", true);

            config.SetMultiValued("dim2", true);
            config.SetRequireDimCount("dim2", true);

            config.SetMultiValued("dim3", true);
            config.SetHierarchical("dim3", true);
            config.SetRequireDimCount("dim3", true);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "a"));
            doc.Add(new FacetField("dim2", "a"));
            doc.Add(new FacetField("dim2", "b"));
            doc.Add(new FacetField("dim3", "a", "b"));
            doc.Add(new FacetField("dim3", "a", "c"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value);
            try
            {
                Assert.AreEqual(1, facets.GetSpecificValue("dim"));
                Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }
            Assert.AreEqual(1, facets.GetSpecificValue("dim2"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim3"));
            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #4
0
        private static FacetsConfig GetConfig()
        {
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("A", true);
            config.SetMultiValued("B", true);
            config.SetRequireDimCount("B", true);
            config.SetHierarchical("D", true);
            return(config);
        }
        public override void BeforeClass()
        {
            base.BeforeClass();

            dir     = NewDirectory();
            taxoDir = NewDirectory();
            // preparations - index, taxonomy, content

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            // Cannot mix ints & floats in the same indexed field:
            config = new FacetsConfig();
            config.SetIndexFieldName("int", "$facets.int");
            config.SetMultiValued("int", true);
            config.SetIndexFieldName("float", "$facets.float");
            config.SetMultiValued("float", true);

            var writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            // index documents, 50% have only 'b' and all have 'a'
            for (int i = 0; i < 110; i++)
            {
                Document doc = new Document();
                // every 11th document is added empty, this used to cause the association
                // aggregators to go into an infinite loop
                if (i % 11 != 0)
                {
                    doc.Add(new Int32AssociationFacetField(2, "int", "a"));
                    doc.Add(new SingleAssociationFacetField(0.5f, "float", "a"));
                    if (i % 2 == 0) // 50
                    {
                        doc.Add(new Int32AssociationFacetField(3, "int", "b"));
                        doc.Add(new SingleAssociationFacetField(0.2f, "float", "b"));
                    }
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.GetReader();
            writer.Dispose();
            taxoReader = new DirectoryTaxonomyReader(taxoDir);
        }
Beispiel #6
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="facetData">The complete facet information used to build the index information.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, FacetData facetData)
        {
            // Build the facet configuration information.
            FacetsConfig config = new FacetsConfig();

            // Builder hierarchicals.
            if (facetData.Hierarchicals != null && facetData.Hierarchicals.Length > 0)
            {
                // Add the config.
                foreach (FacetData.Hierarchical item in facetData.Hierarchicals)
                {
                    config.SetHierarchical(item.DimensionName, item.IsHierarchical);
                }
            }

            // Builder index fields.
            if (facetData.IndexFields != null && facetData.IndexFields.Length > 0)
            {
                // Add the config.
                foreach (FacetData.IndexField item in facetData.IndexFields)
                {
                    config.SetIndexFieldName(item.DimensionName, item.IndexFieldName);
                }
            }

            // Builder multi values.
            if (facetData.MultiValues != null && facetData.MultiValues.Length > 0)
            {
                // Add the config.
                foreach (FacetData.MultiValued item in facetData.MultiValues)
                {
                    config.SetMultiValued(item.DimensionName, item.IsMultiValue);
                }
            }

            // Builder require dimension counts.
            if (facetData.RequireDimensionCounts != null && facetData.RequireDimensionCounts.Length > 0)
            {
                // Add the config.
                foreach (FacetData.RequireDimensionCount item in facetData.RequireDimensionCounts)
                {
                    config.SetRequireDimCount(item.DimensionName, item.IsAccurateCountsRequired);
                }
            }

            // Add text data.
            if (facetData.TextFacetFields.Count > 0)
            {
                // Add the text.
                AddText(writer, facetWriter, facetData.TextFacetFields, config);
            }

            // Add file data.
            if (facetData.FileFacetFields.Count > 0)
            {
                // Add the file.
                AddFile(writer, facetWriter, facetData.FileFacetFields, config);
            }
        }
Beispiel #7
0
 public override void Configure(FacetsConfig config)
 {
     for (int i = 0; i < maxDims; i++)
     {
         config.SetHierarchical(i.ToString(CultureInfo.InvariantCulture), true);
         config.SetMultiValued(i.ToString(CultureInfo.InvariantCulture), true);
     }
 }
Beispiel #8
0
        public virtual void TestBasic()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet);
            Directory dir = NewDirectory();

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            doc.Add(new SortedSetDocValuesFacetField("a", "bar"));
            doc.Add(new SortedSetDocValuesFacetField("a", "zoo"));
            doc.Add(new SortedSetDocValuesFacetField("b", "baz"));
            writer.AddDocument(config.Build(doc));
            if (Random.NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            Assert.AreEqual("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n", facets.GetTopChildren(10, "a").ToString());
            Assert.AreEqual("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.GetTopChildren(10, "b").ToString());

            // DrillDown:
            DrillDownQuery q = new DrillDownQuery(config);

            q.Add("a", "foo");
            q.Add("b", "baz");
            TopDocs hits = searcher.Search(q, 1);

            Assert.AreEqual(1, hits.TotalHits);

            IOUtils.Dispose(writer, searcher.IndexReader, dir);
        }
        public virtual void TestMultiValuedHierarchy()
        {
            Store.Directory         dir        = NewDirectory();
            Store.Directory         taxoDir    = NewDirectory();
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
            FacetsConfig            config     = new FacetsConfig();

            config.SetHierarchical("a", true);
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path", "x"));
            doc.Add(new FacetField("a", "path", "y"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            try
            {
                facets.GetSpecificValue("a");
                fail("didn't hit expected exception");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // expected
            }

            FacetResult result = facets.GetTopChildren(10, "a");

            Assert.AreEqual(1, result.LabelValues.Length);
            Assert.AreEqual(1, (int)result.LabelValues[0].Value);

            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestManyFacetsInOneDocument()
        {
            AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer  = new RandomIndexWriter(Random(), dir, iwc);
            var taxoWriter            = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("dim", true);

            int numLabels = TestUtil.NextInt(Random(), 40000, 100000);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            for (int i = 0; i < numLabels; i++)
            {
                doc.Add(new FacetField("dim", "" + i));
            }
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            FacetResult result = facets.GetTopChildren(int.MaxValue, "dim");

            Assert.AreEqual(numLabels, result.LabelValues.Length);
            var allLabels = new HashSet <string>();

            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                allLabels.Add(labelValue.Label);
                Assert.AreEqual(1, (int)labelValue.Value);
            }
            Assert.AreEqual(numLabels, allLabels.Count);

            IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir);
        }
Beispiel #11
0
        public void BeforeClass()
        {
            dir     = NewDirectory();
            taxoDir = NewDirectory();
            // preparations - index, taxonomy, content

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            // Cannot mix ints & floats in the same indexed field:
            config = new FacetsConfig();
            config.SetIndexFieldName("int", "$facets.int");
            config.SetMultiValued("int", true);
            config.SetIndexFieldName("float", "$facets.float");
            config.SetMultiValued("float", true);

            var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            // index documents, 50% have only 'b' and all have 'a'
            for (int i = 0; i < 110; i++)
            {
                Document doc = new Document();
                // every 11th document is added empty, this used to cause the association
                // aggregators to go into an infinite loop
                if (i % 11 != 0)
                {
                    doc.Add(new IntAssociationFacetField(2, "int", "a"));
                    doc.Add(new FloatAssociationFacetField(0.5f, "float", "a"));
                    if (i % 2 == 0) // 50
                    {
                        doc.Add(new IntAssociationFacetField(3, "int", "b"));
                        doc.Add(new FloatAssociationFacetField(0.2f, "float", "b"));
                    }
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.Reader;
            writer.Dispose();
            taxoReader = new DirectoryTaxonomyReader(taxoDir);
        }
        public static void BeforeClass()
        {
            dir = NewDirectory();
            taxoDir = NewDirectory();
            // preparations - index, taxonomy, content

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            // Cannot mix ints & floats in the same indexed field:
            config = new FacetsConfig();
            config.SetIndexFieldName("int", "$facets.int");
            config.SetMultiValued("int", true);
            config.SetIndexFieldName("float", "$facets.float");
            config.SetMultiValued("float", true);

            var writer = new RandomIndexWriter(Random(), dir);

            // index documents, 50% have only 'b' and all have 'a'
            for (int i = 0; i < 110; i++)
            {
                Document doc = new Document();
                // every 11th document is added empty, this used to cause the association
                // aggregators to go into an infinite loop
                if (i % 11 != 0)
                {
                    doc.Add(new IntAssociationFacetField(2, "int", "a"));
                    doc.Add(new FloatAssociationFacetField(0.5f, "float", "a"));
                    if (i % 2 == 0) // 50
                    {
                        doc.Add(new IntAssociationFacetField(3, "int", "b"));
                        doc.Add(new FloatAssociationFacetField(0.2f, "float", "b"));
                    }
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.Reader;
            writer.Dispose();
            taxoReader = new DirectoryTaxonomyReader(taxoDir);
        }
        public virtual void TestBasic()
        {

            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            doc.Add(new SortedSetDocValuesFacetField("a", "bar"));
            doc.Add(new SortedSetDocValuesFacetField("a", "zoo"));
            doc.Add(new SortedSetDocValuesFacetField("b", "baz"));
            writer.AddDocument(config.Build(doc));
            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            Assert.AreEqual("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n", facets.GetTopChildren(10, "a").ToString());
            Assert.AreEqual("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.GetTopChildren(10, "b").ToString());

            // DrillDown:
            DrillDownQuery q = new DrillDownQuery(config);
            q.Add("a", "foo");
            q.Add("b", "baz");
            TopDocs hits = searcher.Search(q, 1);
            Assert.AreEqual(1, hits.TotalHits);

            IOUtils.Close(writer, searcher.IndexReader, dir);
        }
        public virtual void TestLabelWithDelimiter()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            RandomIndexWriter writer  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("dim", true);

            Document doc = new Document();

            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "test\u001Fone"));
            doc.Add(new FacetField("dim", "test\u001Etwo"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo"));

            FacetResult result = facets.GetTopChildren(10, "dim");

            Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n  test\u001Fone (1)\n  test\u001Etwo (1)\n", result.ToString());
            IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestManyFacetsInOneDocument()
        {
            AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("dim", true);

            int numLabels = TestUtil.NextInt(Random(), 40000, 100000);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            for (int i = 0; i < numLabels; i++)
            {
                doc.Add(new FacetField("dim", "" + i));
            }
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            FacetResult result = facets.GetTopChildren(int.MaxValue, "dim");
            Assert.AreEqual(numLabels, result.LabelValues.Length);
            var allLabels = new HashSet<string>();
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                allLabels.Add(labelValue.label);
                Assert.AreEqual(1, (int)labelValue.value);
            }
            Assert.AreEqual(numLabels, allLabels.Count);

            IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir);
        }
Beispiel #16
0
        public virtual void TestConcurrency()
        {
            AtomicInt32 numDocs  = new AtomicInt32(AtLeast(10000));
            Directory   indexDir = NewDirectory();
            Directory   taxoDir  = NewDirectory();
            ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>();
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            var         tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs));

            ThreadJob[]  indexThreads = new ThreadJob[AtLeast(4)];
            FacetsConfig config       = new FacetsConfig();

            for (int i = 0; i < 10; i++)
            {
                config.SetHierarchical("l1." + i, true);
                config.SetMultiValued("l1." + i, true);
            }

            for (int i = 0; i < indexThreads.Length; i++)
            {
                indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config);
            }

            foreach (ThreadJob t in indexThreads)
            {
                t.Start();
            }
            foreach (ThreadJob t in indexThreads)
            {
                t.Join();
            }

            var tr = new DirectoryTaxonomyReader(tw);

            // +1 for root category
            if (values.Count + 1 != tr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (tr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                fail("mismatch number of categories");
            }
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.IsTrue(tr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = null;
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = tr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir);
        }
Beispiel #17
0
        public virtual void Test_Directory() // LUCENENET specific - name collides with property of LuceneTestCase
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            //int ordLimit = TestNightly ? 100000 : 6000;
            // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under
            // the 1 hour free limit of Azure DevOps, this was reduced to 30000.
            int ordLimit = TestNightly ? 30000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop)
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.IsTrue(result.ChildCount > 0);
                            Assert.IsTrue(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir);
        }
        public virtual void TestNrt()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();
            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter w = new IndexWriter(dir, iwc);
            var tw = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var tw = new DirectoryTaxonomyWriter(taxoDir);
            // first empty commit
            w.Commit();
            tw.Commit();
            var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
        public virtual void TestConcurrency()
        {
            AtomicInteger numDocs = new AtomicInteger(AtLeast(10000));
            Directory indexDir = NewDirectory();
            Directory taxoDir = NewDirectory();
            ConcurrentDictionary<string, string> values = new ConcurrentDictionary<string, string>();
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs.Get()));
            ThreadClass[] indexThreads = new ThreadClass[AtLeast(4)];
            FacetsConfig config = new FacetsConfig();
            for (int i = 0; i < 10; i++)
            {
                config.SetHierarchical("l1." + i, true);
                config.SetMultiValued("l1." + i, true);
            }

            for (int i = 0; i < indexThreads.Length; i++)
            {
                indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config);
            }

            foreach (ThreadClass t in indexThreads)
            {
                t.Start();
            }
            foreach (ThreadClass t in indexThreads)
            {
                t.Join();
            }

            var tr = new DirectoryTaxonomyReader(tw);
            // +1 for root category
            if (values.Count + 1 != tr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (tr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp);
                int level = cp.Length;
                int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path = null;
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = tr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Close(tw, iw, tr, taxoDir, indexDir);
        }
        public virtual void TestLabelWithDelimiter()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("dim", true);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "test\u001Fone"));
            doc.Add(new FacetField("dim", "test\u001Etwo"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo"));

            FacetResult result = facets.GetTopChildren(10, "dim");
            Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n  test\u001Fone (1)\n  test\u001Etwo (1)\n", result.ToString());
            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, dir);
        }
        public virtual void TestRequireDimCount()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();
            config.SetRequireDimCount("dim", true);

            config.SetMultiValued("dim2", true);
            config.SetRequireDimCount("dim2", true);

            config.SetMultiValued("dim3", true);
            config.SetHierarchical("dim3", true);
            config.SetRequireDimCount("dim3", true);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("dim", "a"));
            doc.Add(new FacetField("dim2", "a"));
            doc.Add(new FacetField("dim2", "b"));
            doc.Add(new FacetField("dim3", "a", "b"));
            doc.Add(new FacetField("dim3", "a", "c"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            FacetsCollector c = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), c);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value);
            Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value);
            try
            {
                Assert.AreEqual(1, facets.GetSpecificValue("dim"));
                Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }
            Assert.AreEqual(1, facets.GetSpecificValue("dim2"));
            Assert.AreEqual(1, facets.GetSpecificValue("dim3"));
            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Close(mgr, tw, w, taxoDir, indexDir);
        }
        public virtual void TestMultiValuedHierarchy()
        {
            Store.Directory dir = NewDirectory();
            Store.Directory taxoDir = NewDirectory();
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);
            FacetsConfig config = new FacetsConfig();
            config.SetHierarchical("a", true);
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            Document doc = new Document();
            doc.Add(NewTextField("field", "text", Field.Store.NO));
            doc.Add(new FacetField("a", "path", "x"));
            doc.Add(new FacetField("a", "path", "y"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query, and use MultiCollector to
            // wrap collecting the "normal" hits and also facets:
            searcher.Search(new MatchAllDocsQuery(), c);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c);

            try
            {
                facets.GetSpecificValue("a");
                Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            FacetResult result = facets.GetTopChildren(10, "a");
            Assert.AreEqual(1, result.LabelValues.Length);
            Assert.AreEqual(1, (int)result.LabelValues[0].value);

            IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir);
        }