public virtual void TestRequireDimCount() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetRequireDimCount("dim", true); config.SetMultiValued("dim2", true); config.SetRequireDimCount("dim2", true); config.SetMultiValued("dim3", true); config.SetHierarchical("dim3", true); config.SetRequireDimCount("dim3", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "a")); doc.Add(new FacetField("dim2", "a")); doc.Add(new FacetField("dim2", "b")); doc.Add(new FacetField("dim3", "a", "b")); doc.Add(new FacetField("dim3", "a", "c")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value); try { Assert.AreEqual(1, facets.GetSpecificValue("dim")); fail("didn't hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } Assert.AreEqual(1, facets.GetSpecificValue("dim2")); Assert.AreEqual(1, facets.GetSpecificValue("dim3")); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
/// <summary>Empty constructor</summary> public AssociationsFacetsExample() { config = new FacetsConfig(); config.SetMultiValued("tags", true); config.SetIndexFieldName("tags", "$tags"); config.SetMultiValued("genre", true); config.SetIndexFieldName("genre", "$genre"); }
public virtual void TestRequireDimCount() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetRequireDimCount("dim", true); config.SetMultiValued("dim2", true); config.SetRequireDimCount("dim2", true); config.SetMultiValued("dim3", true); config.SetHierarchical("dim3", true); config.SetRequireDimCount("dim3", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "a")); doc.Add(new FacetField("dim2", "a")); doc.Add(new FacetField("dim2", "b")); doc.Add(new FacetField("dim3", "a", "b")); doc.Add(new FacetField("dim3", "a", "c")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value); try { Assert.AreEqual(1, facets.GetSpecificValue("dim")); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } Assert.AreEqual(1, facets.GetSpecificValue("dim2")); Assert.AreEqual(1, facets.GetSpecificValue("dim3")); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
private static FacetsConfig GetConfig() { FacetsConfig config = new FacetsConfig(); config.SetMultiValued("A", true); config.SetMultiValued("B", true); config.SetRequireDimCount("B", true); config.SetHierarchical("D", true); return(config); }
public override void BeforeClass() { base.BeforeClass(); dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new Int32AssociationFacetField(2, "int", "a")); doc.Add(new SingleAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new Int32AssociationFacetField(3, "int", "b")); doc.Add(new SingleAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.GetReader(); writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="facetData">The complete facet information used to build the index information.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, FacetData facetData) { // Build the facet configuration information. FacetsConfig config = new FacetsConfig(); // Builder hierarchicals. if (facetData.Hierarchicals != null && facetData.Hierarchicals.Length > 0) { // Add the config. foreach (FacetData.Hierarchical item in facetData.Hierarchicals) { config.SetHierarchical(item.DimensionName, item.IsHierarchical); } } // Builder index fields. if (facetData.IndexFields != null && facetData.IndexFields.Length > 0) { // Add the config. foreach (FacetData.IndexField item in facetData.IndexFields) { config.SetIndexFieldName(item.DimensionName, item.IndexFieldName); } } // Builder multi values. if (facetData.MultiValues != null && facetData.MultiValues.Length > 0) { // Add the config. foreach (FacetData.MultiValued item in facetData.MultiValues) { config.SetMultiValued(item.DimensionName, item.IsMultiValue); } } // Builder require dimension counts. if (facetData.RequireDimensionCounts != null && facetData.RequireDimensionCounts.Length > 0) { // Add the config. foreach (FacetData.RequireDimensionCount item in facetData.RequireDimensionCounts) { config.SetRequireDimCount(item.DimensionName, item.IsAccurateCountsRequired); } } // Add text data. if (facetData.TextFacetFields.Count > 0) { // Add the text. AddText(writer, facetWriter, facetData.TextFacetFields, config); } // Add file data. if (facetData.FileFacetFields.Count > 0) { // Add the file. AddFile(writer, facetWriter, facetData.FileFacetFields, config); } }
public override void Configure(FacetsConfig config) { for (int i = 0; i < maxDims; i++) { config.SetHierarchical(i.ToString(CultureInfo.InvariantCulture), true); config.SetMultiValued(i.ToString(CultureInfo.InvariantCulture), true); } }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Dispose(writer, searcher.IndexReader, dir); }
public virtual void TestMultiValuedHierarchy() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "x")); doc.Add(new FacetField("a", "path", "y")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); try { facets.GetSpecificValue("a"); fail("didn't hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } FacetResult result = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, result.LabelValues.Length); Assert.AreEqual(1, (int)result.LabelValues[0].Value); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestManyFacetsInOneDocument() { AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); int numLabels = TestUtil.NextInt(Random(), 40000, 100000); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); for (int i = 0; i < numLabels; i++) { doc.Add(new FacetField("dim", "" + i)); } writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); FacetResult result = facets.GetTopChildren(int.MaxValue, "dim"); Assert.AreEqual(numLabels, result.LabelValues.Length); var allLabels = new HashSet <string>(); foreach (LabelAndValue labelValue in result.LabelValues) { allLabels.Add(labelValue.Label); Assert.AreEqual(1, (int)labelValue.Value); } Assert.AreEqual(numLabels, allLabels.Count); IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir); }
public void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public static void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestManyFacetsInOneDocument() { AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); int numLabels = TestUtil.NextInt(Random(), 40000, 100000); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); for (int i = 0; i < numLabels; i++) { doc.Add(new FacetField("dim", "" + i)); } writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); FacetResult result = facets.GetTopChildren(int.MaxValue, "dim"); Assert.AreEqual(numLabels, result.LabelValues.Length); var allLabels = new HashSet<string>(); foreach (LabelAndValue labelValue in result.LabelValues) { allLabels.Add(labelValue.label); Assert.AreEqual(1, (int)labelValue.value); } Assert.AreEqual(numLabels, allLabels.Count); IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir); }
public virtual void TestConcurrency() { AtomicInt32 numDocs = new AtomicInt32(AtLeast(10000)); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>(); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs)); ThreadJob[] indexThreads = new ThreadJob[AtLeast(4)]; FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { config.SetHierarchical("l1." + i, true); config.SetMultiValued("l1." + i, true); } for (int i = 0; i < indexThreads.Length; i++) { indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config); } foreach (ThreadJob t in indexThreads) { t.Start(); } foreach (ThreadJob t in indexThreads) { t.Join(); } var tr = new DirectoryTaxonomyReader(tw); // +1 for root category if (values.Count + 1 != tr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (tr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } fail("mismatch number of categories"); } int[] parents = tr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.IsTrue(tr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = tr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir); }
public virtual void Test_Directory() // LUCENENET specific - name collides with property of LuceneTestCase { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: //int ordLimit = TestNightly ? 100000 : 6000; // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under // the 1 hour free limit of Azure DevOps, this was reduced to 30000. int ordLimit = TestNightly ? 30000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.IsTrue(result.ChildCount > 0); Assert.IsTrue(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (Verbose) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir); }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public virtual void TestConcurrency() { AtomicInteger numDocs = new AtomicInteger(AtLeast(10000)); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); ConcurrentDictionary<string, string> values = new ConcurrentDictionary<string, string>(); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs.Get())); ThreadClass[] indexThreads = new ThreadClass[AtLeast(4)]; FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { config.SetHierarchical("l1." + i, true); config.SetMultiValued("l1." + i, true); } for (int i = 0; i < indexThreads.Length; i++) { indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config); } foreach (ThreadClass t in indexThreads) { t.Start(); } foreach (ThreadClass t in indexThreads) { t.Join(); } var tr = new DirectoryTaxonomyReader(tw); // +1 for root category if (values.Count + 1 != tr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (tr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } Fail("mismatch number of categories"); } int[] parents = tr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = tr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Close(tw, iw, tr, taxoDir, indexDir); }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRequireDimCount() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetRequireDimCount("dim", true); config.SetMultiValued("dim2", true); config.SetRequireDimCount("dim2", true); config.SetMultiValued("dim3", true); config.SetHierarchical("dim3", true); config.SetRequireDimCount("dim3", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "a")); doc.Add(new FacetField("dim2", "a")); doc.Add(new FacetField("dim2", "b")); doc.Add(new FacetField("dim3", "a", "b")); doc.Add(new FacetField("dim3", "a", "c")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value); try { Assert.AreEqual(1, facets.GetSpecificValue("dim")); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } Assert.AreEqual(1, facets.GetSpecificValue("dim2")); Assert.AreEqual(1, facets.GetSpecificValue("dim3")); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestMultiValuedHierarchy() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "x")); doc.Add(new FacetField("a", "path", "y")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); try { facets.GetSpecificValue("a"); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } FacetResult result = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, result.LabelValues.Length); Assert.AreEqual(1, (int)result.LabelValues[0].value); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }