public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestWithThreads() { // LUCENE-5303: OrdinalsCache used the ThreadLocal BinaryDV instead of reader.getCoreCacheKey(). Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer = new IndexWriter(indexDir, conf); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("A", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("A", "2")); writer.AddDocument(config.Build(taxoWriter, doc)); var reader = DirectoryReader.Open(writer, true); CachedOrdinalsReader ordsReader = new CachedOrdinalsReader(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); ThreadClass[] threads = new ThreadClass[3]; for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(this, "CachedOrdsThread-" + i, reader, ordsReader); } long ramBytesUsed = 0; foreach (ThreadClass t in threads) { t.Start(); t.Join(); if (ramBytesUsed == 0) { ramBytesUsed = ordsReader.RamBytesUsed(); } else { Assert.AreEqual(ramBytesUsed, ordsReader.RamBytesUsed()); } } IOUtils.Close(writer, taxoWriter, reader, indexDir, taxoDir); }
public virtual void TestSlowCompositeReaderWrapper() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.GetReader())); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Dispose(writer, searcher.IndexReader, dir); }
private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddFacets(doc, config, false); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
public virtual void TestMultiValuedHierarchy() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "x")); doc.Add(new FacetField("a", "path", "y")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); try { facets.GetSpecificValue("a"); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } FacetResult result = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, result.LabelValues.Length); Assert.AreEqual(1, (int)result.LabelValues[0].Value); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
/// <summary>Build the example index.</summary> private void Index() { using IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(EXAMPLE_VERSION, new WhitespaceAnalyzer(EXAMPLE_VERSION))); // Writes facet ords to a separate directory from the main index using DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Bob"), new FacetField("Publish Date", "2010", "10", "15") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Lisa"), new FacetField("Publish Date", "2010", "10", "20") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Lisa"), new FacetField("Publish Date", "2012", "1", "1") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Susan"), new FacetField("Publish Date", "2012", "1", "7") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Frank"), new FacetField("Publish Date", "1999", "5", "5") })); }
public override int DoLogic() { if (config != null) { List <FacetField> facets = new List <FacetField>(); RunData.FacetSource.GetNextFacets(facets); foreach (FacetField ff in facets) { m_doc.Add(ff); } m_doc = config.Build(RunData.TaxonomyWriter, m_doc); } return(base.DoLogic()); }
/// <summary>Build the example index.</summary> private void Index() { using IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(EXAMPLE_VERSION, new WhitespaceAnalyzer(EXAMPLE_VERSION))); indexWriter.AddDocument(config.Build(new Document { new SortedSetDocValuesFacetField("Author", "Bob"), new SortedSetDocValuesFacetField("Publish Year", "2010") })); indexWriter.AddDocument(config.Build(new Document { new SortedSetDocValuesFacetField("Author", "Lisa"), new SortedSetDocValuesFacetField("Publish Year", "2010") })); indexWriter.AddDocument(config.Build(new Document { new SortedSetDocValuesFacetField("Author", "Lisa"), new SortedSetDocValuesFacetField("Publish Year", "2012") })); indexWriter.AddDocument(config.Build(new Document { new SortedSetDocValuesFacetField("Author", "Susan"), new SortedSetDocValuesFacetField("Publish Year", "2012") })); indexWriter.AddDocument(config.Build(new Document { new SortedSetDocValuesFacetField("Author", "Frank"), new SortedSetDocValuesFacetField("Publish Year", "1999") })); }
public override void BeforeClass() { base.BeforeClass(); dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new Int32AssociationFacetField(2, "int", "a")); doc.Add(new SingleAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new Int32AssociationFacetField(3, "int", "b")); doc.Add(new SingleAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.GetReader(); writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestReallyNoNormsForDrillDown() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetSimilarity(new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this)); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path")); writer.AddDocument(config.Build(taxoWriter, doc)); IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); bool hasContent = random.NextBoolean(); if (hasContent) { AddField(doc); } AddFacets(doc, config, hasContent); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent) { for (int i = 0; i < 2; i++) { Document doc = new Document(); if (withContent) { doc.Add(new StringField("f", "a", Field.Store.NO)); } if (config != null) { doc.Add(new FacetField("A", Convert.ToString(i))); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } else { indexWriter.AddDocument(doc); } } indexWriter.Commit(); }
public virtual void TestSumScoreAggregator() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); if (Random().NextBoolean()) // don't match all documents { doc.Add(new StringField("f", "v", Field.Store.NO)); } doc.Add(new FacetField("dim", "a")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector fc = new FacetsCollector(true); ConstantScoreQuery csq = new ConstantScoreQuery(new MatchAllDocsQuery()); csq.Boost = 2.0f; TopDocs td = FacetsCollector.Search(NewSearcher(r), csq, 10, fc); Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, new TaxonomyFacetSumValueSource.ScoreValueSource()); int expected = (int)(td.MaxScore * td.TotalHits); Assert.AreEqual(expected, (int)facets.GetSpecificValue("dim", "a")); IOUtils.Close(iw, taxoWriter, taxoReader, taxoDir, r, indexDir); }
public virtual void TestDetectHierarchicalField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "other")); try { config.Build(taxoWriter, doc); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestDetectHierarchicalField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); var writer = new RandomIndexWriter(Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "other")); try { config.Build(taxoWriter, doc); fail("did not hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } IOUtils.Dispose(writer, taxoWriter, dir, taxoDir); }
public virtual void TestMixedTypesInSameIndexField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new IntAssociationFacetField(14, "a", "x")); doc.Add(new FloatAssociationFacetField(55.0f, "b", "y")); try { writer.AddDocument(config.Build(taxoWriter, doc)); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestMixedTypesInSameIndexField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir); Document doc = new Document(); doc.Add(new Int32AssociationFacetField(14, "a", "x")); doc.Add(new SingleAssociationFacetField(55.0f, "b", "y")); try { writer.AddDocument(config.Build(taxoWriter, doc)); fail("did not hit expected exception"); } catch (Exception exc) when(exc.IsIllegalArgumentException()) { // expected } IOUtils.Dispose(writer, taxoWriter, dir, taxoDir); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random().NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random().NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestSeparateIndexedFields() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("b", "$b"); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); doc.Add(new StringField("f", "v", Field.Store.NO)); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets1 = GetTaxonomyFacetCounts(taxoReader, config, sfc); Facets facets2 = GetTaxonomyFacetCounts(taxoReader, config, sfc, "$b"); Assert.AreEqual(r.MaxDoc, (int)facets1.GetTopChildren(10, "a").Value); Assert.AreEqual(r.MaxDoc, (int)facets2.GetTopChildren(10, "b").Value); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestSlowCompositeReaderWrapper() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); writer.Commit(); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader)); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Close(writer, searcher.IndexReader, dir); }
public static void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: ByteArrayOutputStream bos = new ByteArrayOutputStream(); PrintTaxonomyStats.PrintStats(taxoReader, Console.Out, true); string result = bos.ToString(); Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestDetectMultiValuedField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path")); doc.Add(new FacetField("a", "path2")); try { config.Build(taxoWriter, doc); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestCountRoot() { // LUCENE-4882: FacetsAccumulator threw NPE if a FacetRequest was defined on CP.EMPTY Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); foreach (FacetResult result in facets.GetAllDims(10)) { Assert.AreEqual(r.NumDocs, (int)result.Value); } IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestChildCount() { // LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator var indexDir = NewDirectory(); var taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new FacetField("a", Convert.ToString(i))); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); Assert.AreEqual(10, facets.GetTopChildren(2, "a").ChildCount); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new LongField("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.Reader; var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewLongRange("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(tw, tr, td, w, r, d); }
public virtual void TestSparseFacets() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
private void indexTwoDocs(TaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent) { for (int i = 0; i < 2; i++) { Document doc = new Document(); if (withContent) { doc.Add(new StringField("f", "a", Field.Store.NO)); } if (config != null) { doc.Add(new FacetField("A", Convert.ToString(i))); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } else { indexWriter.AddDocument(doc); } } indexWriter.Commit(); }
public virtual void TestRollupValues() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); //config.setRequireDimCount("a", true); for (int i = 0; i < 4; i++) { Document doc = new Document(); doc.Add(new NumericDocValuesField("price", (i + 1))); doc.Add(new FacetField("a", Convert.ToString(i % 2), "1")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); ValueSource valueSource = new LongFieldSource("price"); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, sfc, valueSource); Assert.AreEqual("dim=a path=[] value=10.0 childCount=2\n 1 (6.0)\n 0 (4.0)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestStaleState() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); IndexReader r = writer.Reader; SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); writer.AddDocument(config.Build(doc)); doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "baz")); writer.AddDocument(config.Build(doc)); IndexSearcher searcher = NewSearcher(writer.Reader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); try { new SortedSetDocValuesFacetCounts(state, c); Fail("did not hit expected exception"); } catch (InvalidOperationException) { // expected } r.Dispose(); writer.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestRequireDimCount() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetRequireDimCount("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new IntAssociationFacetField(14, "a", "x")); try { writer.AddDocument(config.Build(taxoWriter, doc)); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
/// <summary> /// Add text to the existing index. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="addTextData">The text data to add.</param> /// <param name="config">The facet configuration information.</param> public void AddText(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, AddTextData[]> addTextData, FacetsConfig config) { long totalTextLength = 0; long maxTextLengthBeforeCommit = 30000000L; // For each text facet. foreach (KeyValuePair <FacetField, AddTextData[]> item in addTextData) { // If text exists. if (item.Value != null && item.Value.Length > 0) { // Add the text. FieldType nameFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // Add the text. FieldType completeFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // Add the text. FieldType textFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = false, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each text. foreach (AddTextData data in item.Value) { // Should the data be stored. completeFieldType.Stored = data.StoreText; // Create the document. Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field textName = new Field("textname", data.Name.ToLower(), nameFieldType); Lucene.Net.Documents.Field textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType); document.Add(item.Key); document.Add(textName); document.Add(textComplete); // Split the white spaces from the text. string[] words = data.Text.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field textData = new Field("facetcontent", word, textFieldType); document.Add(textData); } } } // Add the document. writer.AddDocument(config.Build(facetWriter, document)); // Commit after a set number of documents. totalTextLength += (long)data.Text.Length; if (totalTextLength > maxTextLengthBeforeCommit) { // Commit the index. writer.Commit(); facetWriter.Commit(); totalTextLength = 0; } } } } // Commit the index. writer.Commit(); facetWriter.Commit(); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param> /// <param name="files">The list of files that are to be added.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param> /// <param name="facetField">The facet field information.</param> /// <param name="config">The facet configuration information.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config) { FieldType pathFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; FieldType contentFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = documents.TokenizeContent, Stored = documents.StoreContent, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each file. for (int i = 0; i < files.Length; i++) { // If the file exists if (File.Exists(files[i])) { Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); try { FileInfo fileInfo = new FileInfo(files[i]); string file = files[i].Replace(directoryInfo.Root.FullName, "").ToLower(); Lucene.Net.Documents.Field path = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType); Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType); // Add the fields. document.Add(facetField); document.Add(path); document.Add(modified); // Create the stream reader. OpenDocument(files[i]); string content = Nequeo.Xml.Document.ExtractContent(_xDocument); // If content exists. if (!String.IsNullOrEmpty(content)) { // Split the white spaces from the text. string[] words = content.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType); document.Add(contentField); } } } } // Add the document. writer.AddDocument(config.Build(facetWriter, document)); _document.Close(); // Commit after a set number of documents. documents.TotalDocumentSize += fileInfo.Length; if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit) { // Commit the index. writer.Commit(); facetWriter.Commit(); documents.TotalDocumentSize = 0; } } catch (Exception) { throw; } finally { CloseDocument(); } } } }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestWithScore() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 4; i++) { Document doc = new Document(); doc.Add(new NumericDocValuesField("price", (i + 1))); doc.Add(new FacetField("a", Convert.ToString(i % 2))); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); ValueSource valueSource = new ValueSourceAnonymousInnerClassHelper(this); FacetsCollector fc = new FacetsCollector(true); // score documents by their 'price' field - makes asserting the correct counts for the categories easier Query q = new FunctionQuery(new LongFieldSource("price")); FacetsCollector.Search(NewSearcher(r), q, 10, fc); Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, valueSource); Assert.AreEqual("dim=a path=[] value=10.0 childCount=2\n 1 (6.0)\n 0 (4.0)\n", facets.GetTopChildren(10, "a").ToString()); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random().NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random().NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestSparseFacets() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); writer.AddDocument(config.Build(doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestGetFacetResultsTwice() { // LUCENE-4893: counts were multiplied as many times as getFacetResults was called. Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); DirectoryReader r = DirectoryReader.Open(iw, true); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); IList<FacetResult> res1 = facets.GetAllDims(10); IList<FacetResult> res2 = facets.GetAllDims(10); Assert.AreEqual(res1, res2, "calling getFacetResults twice should return the .equals()=true result"); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } result = bos.ToString(); } Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void TestManyFacetsInOneDocument() { AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); int numLabels = TestUtil.NextInt(Random(), 40000, 100000); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); for (int i = 0; i < numLabels; i++) { doc.Add(new FacetField("dim", "" + i)); } writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); FacetResult result = facets.GetTopChildren(int.MaxValue, "dim"); Assert.AreEqual(numLabels, result.LabelValues.Length); var allLabels = new HashSet<string>(); foreach (LabelAndValue labelValue in result.LabelValues) { allLabels.Add(labelValue.label); Assert.AreEqual(1, (int)labelValue.value); } Assert.AreEqual(numLabels, allLabels.Count); IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir); }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new Int64Field("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.Reader; var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(tw, tr, td, w, r, d); }
public virtual void TestMultiValuedHierarchy() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "x")); doc.Add(new FacetField("a", "path", "y")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); try { facets.GetSpecificValue("a"); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } FacetResult result = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, result.LabelValues.Length); Assert.AreEqual(1, (int)result.LabelValues[0].value); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandom() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); string[] tokens = GetRandomTokens(10); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt32(Random, 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.GetReader()); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random.Next(tokens.Length)]; if (Verbose) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new SortedSetDocValuesFacetCounts(state, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken, StringComparison.Ordinal)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { if (!expectedCounts[j].TryGetValue(doc.dims[j], out int?v)) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties //sortTies(actual); CollectionAssert.AreEqual(expected, actual); } IOUtils.Dispose(w, searcher.IndexReader, indexDir, taxoDir); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList<TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List<Dictionary<string, int?>>(); for (int i = 0; i < numDims; i++) { expectedCounts[i] = new Dictionary<string, int?>(); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int? v = expectedCounts[j][doc.dims[j]]; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } IList<FacetResult> expected = new List<FacetResult>(); for (int i = 0; i < numDims; i++) { IList<LabelAndValue> labelValues = new List<LabelAndValue>(); int totCount = 0; foreach (KeyValuePair<string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList<FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void TestReallyNoNormsForDrillDown() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetSimilarity(new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this)); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path")); writer.AddDocument(config.Build(taxoWriter, doc)); IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestRequireDimCount() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetRequireDimCount("dim", true); config.SetMultiValued("dim2", true); config.SetRequireDimCount("dim2", true); config.SetMultiValued("dim3", true); config.SetHierarchical("dim3", true); config.SetRequireDimCount("dim3", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "a")); doc.Add(new FacetField("dim2", "a")); doc.Add(new FacetField("dim2", "b")); doc.Add(new FacetField("dim3", "a", "b")); doc.Add(new FacetField("dim3", "a", "c")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value); try { Assert.AreEqual(1, facets.GetSpecificValue("dim")); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } Assert.AreEqual(1, facets.GetSpecificValue("dim2")); Assert.AreEqual(1, facets.GetSpecificValue("dim3")); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public override void Run() { try { var seen = new HashSet <string>(); IList <string> paths = new List <string>(); while (true) { Document doc = new Document(); int numPaths = TestUtil.NextInt(Random(), 1, 5); for (int i = 0; i < numPaths; i++) { string path; if (paths.Count > 0 && Random().Next(5) != 4) { // Use previous path path = paths[Random().Next(paths.Count)]; } else { // Create new path path = null; while (true) { path = TestUtil.RandomRealisticUnicodeString(Random()); if (path.Length != 0 && !seen.Contains(path)) { seen.Add(path); paths.Add(path); break; } } } doc.Add(new FacetField("field", path)); } try { w.AddDocument(config.Build(tw, doc)); if (mgr != null && Random().NextDouble() < 0.02) { w.Commit(); tw.Commit(); mgr.MaybeRefresh(); } } catch (IOException ioe) { throw new Exception(ioe.Message, ioe); } if (VERBOSE) { Console.WriteLine("TW size=" + tw.Size + " vs " + ordLimit); } if (tw.Size >= ordLimit) { break; } } } finally { stop.Set(true); } }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }