/// <summary> /// Aggreggates float facet values from the provided /// <see cref="ValueSource"/>, and pulls ordinals from the /// provided <see cref="OrdinalsReader"/>. /// </summary> public TaxonomyFacetSumValueSource(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource) : base(ordinalsReader.IndexFieldName, taxoReader, config) { this.ordinalsReader = ordinalsReader; SumValues(fc.GetMatchingDocs(), fc.KeepScores, valueSource); }
static private void Configure(FacetsConfig fc, string facetName, bool isHierarchical, bool isMultiValued) { if (isHierarchical) fc.setHierarchical(facetName, true); if (isMultiValued) fc.setMultiValued(facetName, true); }
/// <summary> /// Initializes a new instance of the <see cref="LuceneFacetBuilder"/> class. /// </summary> /// <param name="taxonomyWriter">The taxonomy writer.</param> /// <exception cref="ArgumentNullException"></exception> public LuceneFacetBuilder(TaxonomyWriter taxonomyWriter) { if (taxonomyWriter == null) throw new ArgumentNullException(nameof(taxonomyWriter)); _taxonomyWriter = taxonomyWriter; FacetsConfig = new FacetsConfig(); }
public IndexerThread(IndexWriter w, FacetsConfig config, TaxonomyWriter tw, ReferenceManager<SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop) { this.w = w; this.config = config; this.tw = tw; this.mgr = mgr; this.ordLimit = ordLimit; this.stop = stop; }
static private FacetsConfig CreateFacetsConfig() { var facetsConfig = new FacetsConfig(); Configure(facetsConfig, FacetNames.Folder, true, false); Configure(facetsConfig, FacetNames.PlaceName, true, false); Configure(facetsConfig, FacetNames.Keywords, false, true); Configure(facetsConfig, FacetNames.Date, true, false); return facetsConfig; }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestWithThreads() { // LUCENE-5303: OrdinalsCache used the ThreadLocal BinaryDV instead of reader.getCoreCacheKey(). Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer = new IndexWriter(indexDir, conf); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("A", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("A", "2")); writer.AddDocument(config.Build(taxoWriter, doc)); var reader = DirectoryReader.Open(writer, true); CachedOrdinalsReader ordsReader = new CachedOrdinalsReader(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); ThreadClass[] threads = new ThreadClass[3]; for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(this, "CachedOrdsThread-" + i, reader, ordsReader); } long ramBytesUsed = 0; foreach (ThreadClass t in threads) { t.Start(); t.Join(); if (ramBytesUsed == 0) { ramBytesUsed = ordsReader.RamBytesUsed(); } else { Assert.AreEqual(ramBytesUsed, ordsReader.RamBytesUsed()); } } IOUtils.Close(writer, taxoWriter, reader, indexDir, taxoDir); }
public static void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestCustomDoublesValueSource() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); writer.AddDocument(doc); writer.AddDocument(doc); writer.AddDocument(doc); // Test wants 3 docs in one segment: writer.ForceMerge(1); var vs = new ValueSourceAnonymousInnerClassHelper(this, doc); FacetsConfig config = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); IndexReader r = writer.Reader; IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) }; Filter fastMatchFilter; AtomicBoolean filterWasUsed = new AtomicBoolean(); if (Random().NextBoolean()) { // Sort of silly: fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed); } else { fastMatchFilter = null; } if (VERBOSE) { Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter); } Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.GetTopChildren(10, "field").ToString()); Assert.True(fastMatchFilter == null || filterWasUsed.Get()); DrillDownQuery ddq = new DrillDownQuery(config); ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs)); // Test simple drill-down: Assert.AreEqual(1, s.Search(ddq, 10).TotalHits); // Test drill-sideways after drill-down DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter); DrillSidewaysResult dsr = ds.Search(ddq, 10); Assert.AreEqual(1, dsr.Hits.TotalHits); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Dispose(r, writer, dir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
/// <summary> /// Create <see cref="FastTaxonomyFacetCounts"/>, using the /// specified <paramref name="indexFieldName"/> for ordinals. Use /// this if you had set <see cref="FacetsConfig.SetIndexFieldName"/> /// to change the index /// field name for certain dimensions. /// </summary> public FastTaxonomyFacetCounts(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : base(indexFieldName, taxoReader, config) { Count(fc.GetMatchingDocs()); }
/// <summary> /// Create <see cref="TaxonomyFacetSumIntAssociations"/> against /// the specified index field. /// </summary> public TaxonomyFacetSumIntAssociations(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : base(indexFieldName, taxoReader, config) { SumValues(fc.GetMatchingDocs()); }
/// <summary> /// Sole constructor. /// </summary> protected internal FloatTaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) : base(indexFieldName, taxoReader, config) { values = new float[taxoReader.Count]; }
public virtual void TestRandom() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); string[] tokens = GetRandomTokens(10); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt32(Random, 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.GetReader()); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random.Next(tokens.Length)]; if (Verbose) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new SortedSetDocValuesFacetCounts(state, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken, StringComparison.Ordinal)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { if (!expectedCounts[j].TryGetValue(doc.dims[j], out int?v)) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties //sortTies(actual); CollectionAssert.AreEqual(expected, actual); } IOUtils.Dispose(w, searcher.IndexReader, indexDir, taxoDir); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random().NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random().NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList<TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List<Dictionary<string, int?>>(); for (int i = 0; i < numDims; i++) { expectedCounts[i] = new Dictionary<string, int?>(); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int? v = expectedCounts[j][doc.dims[j]]; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } IList<FacetResult> expected = new List<FacetResult>(); for (int i = 0; i < numDims; i++) { IList<LabelAndValue> labelValues = new List<LabelAndValue>(); int totCount = 0; foreach (KeyValuePair<string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList<FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void TestReallyNoNormsForDrillDown() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetSimilarity(new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this)); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path")); writer.AddDocument(config.Build(taxoWriter, doc)); IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestManyFacetsInOneDocument() { AssumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.FieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); int numLabels = TestUtil.NextInt(Random(), 40000, 100000); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); for (int i = 0; i < numLabels; i++) { doc.Add(new FacetField("dim", "" + i)); } writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); FacetResult result = facets.GetTopChildren(int.MaxValue, "dim"); Assert.AreEqual(numLabels, result.LabelValues.Length); var allLabels = new HashSet<string>(); foreach (LabelAndValue labelValue in result.LabelValues) { allLabels.Add(labelValue.label); Assert.AreEqual(1, (int)labelValue.value); } Assert.AreEqual(numLabels, allLabels.Count); IOUtils.Close(searcher.IndexReader, taxoWriter, writer, taxoReader, dir, taxoDir); }
public virtual void TestMultiValuedHierarchy() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("a", true); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "x")); doc.Add(new FacetField("a", "path", "y")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); try { facets.GetSpecificValue("a"); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } FacetResult result = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, result.LabelValues.Length); Assert.AreEqual(1, (int)result.LabelValues[0].value); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestGetFacetResultsTwice() { // LUCENE-4893: counts were multiplied as many times as getFacetResults was called. Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); DirectoryReader r = DirectoryReader.Open(iw, true); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); IList<FacetResult> res1 = facets.GetAllDims(10); IList<FacetResult> res2 = facets.GetAllDims(10); Assert.AreEqual(res1, res2, "calling getFacetResults twice should return the .equals()=true result"); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestDetectMultiValuedField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path")); doc.Add(new FacetField("a", "path2")); try { config.Build(taxoWriter, doc); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new Int64Field("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.Reader; var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Dispose(tw, tr, td, w, r, d); }
public virtual void TestRequireDimCount() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetRequireDimCount("dim", true); config.SetMultiValued("dim2", true); config.SetRequireDimCount("dim2", true); config.SetMultiValued("dim3", true); config.SetHierarchical("dim3", true); config.SetRequireDimCount("dim3", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "a")); doc.Add(new FacetField("dim2", "a")); doc.Add(new FacetField("dim2", "b")); doc.Add(new FacetField("dim3", "a", "b")); doc.Add(new FacetField("dim3", "a", "c")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetTopChildren(10, "dim").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim2").Value); Assert.AreEqual(1, facets.GetTopChildren(10, "dim3").Value); try { Assert.AreEqual(1, facets.GetSpecificValue("dim")); Fail("didn't hit expected exception"); } catch (System.ArgumentException) { // expected } Assert.AreEqual(1, facets.GetSpecificValue("dim2")); Assert.AreEqual(1, facets.GetSpecificValue("dim3")); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandomDoubles() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int numDocs = AtLeast(1000); double[] values = new double[numDocs]; double minValue = double.PositiveInfinity; double maxValue = double.NegativeInfinity; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); double v = Random().NextDouble(); values[i] = v; doc.Add(new DoubleDocValuesField("field", v)); doc.Add(new DoubleField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt(Random(), 1, 5); DoubleRange[] ranges = new DoubleRange[numRange]; int[] expectedCounts = new int[numRange]; double minAcceptedValue = double.PositiveInfinity; double maxAcceptedValue = double.NegativeInfinity; for (int rangeID = 0; rangeID < numRange; rangeID++) { double min; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random().NextDouble(); } double max; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random().NextDouble(); } if (min > max) { double x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random().NextBoolean(); maxIncl = Random().NextBoolean(); } ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random().NextBoolean()) { if (Random().NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new DoubleFieldSource("field"); Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.Label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value); DoubleRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random().NextBoolean()) { if (Random().NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Dispose(w, r, dir); }
public virtual void TestSegmentsWithoutCategoriesOrResults() { // tests the accumulator when there are segments with no results var indexDir = NewDirectory(); var taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges IndexWriter indexWriter = new IndexWriter(indexDir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher indexSearcher = NewSearcher(indexReader); // search for "f:a", only segments 1 and 3 should match results Query q = new TermQuery(new Term("f", "a")); FacetsCollector sfc = new FacetsCollector(); indexSearcher.Search(q, sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "A"); Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children"); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(2, (int)labelValue.value, "wrong weight for child " + labelValue.label); } IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public virtual void TestSeparateIndexedFields() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("b", "$b"); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); doc.Add(new StringField("f", "v", Field.Store.NO)); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets1 = GetTaxonomyFacetCounts(taxoReader, config, sfc); Facets facets2 = GetTaxonomyFacetCounts(taxoReader, config, sfc, "$b"); Assert.AreEqual(r.MaxDoc, (int)facets1.GetTopChildren(10, "a").Value); Assert.AreEqual(r.MaxDoc, (int)facets2.GetTopChildren(10, "b").Value); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
private void indexTwoDocs(ITaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent) { for (int i = 0; i < 2; i++) { Document doc = new Document(); if (withContent) { doc.Add(new StringField("f", "a", Field.Store.NO)); } if (config != null) { doc.Add(new FacetField("A", Convert.ToString(i))); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } else { indexWriter.AddDocument(doc); } } indexWriter.Commit(); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
/// <summary> /// Create <see cref="TaxonomyFacetSumSingleAssociations"/> against /// the default index field. /// </summary> public TaxonomyFacetSumSingleAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc) { }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random().NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random().NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestCountRoot() { // LUCENE-4882: FacetsAccumulator threw NPE if a FacetRequest was defined on CP.EMPTY Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); doc.Add(new FacetField("a", "1")); doc.Add(new FacetField("b", "1")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); foreach (FacetResult result in facets.GetAllDims(10)) { Assert.AreEqual(r.NumDocs, (int)result.Value); } IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: ByteArrayOutputStream bos = new ByteArrayOutputStream(); PrintTaxonomyStats.PrintStats(taxoReader, Console.Out, true); string result = bos.ToString(); Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
/// <summary> /// Create <see cref="FastTaxonomyFacetCounts"/>, which also /// counts all facet labels. /// </summary> public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc) { }
private void indexTwoDocs(TaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, bool withContent) { for (int i = 0; i < 2; i++) { Document doc = new Document(); if (withContent) { doc.Add(new StringField("f", "a", Field.Store.NO)); } if (config != null) { doc.Add(new FacetField("A", Convert.ToString(i))); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } else { indexWriter.AddDocument(doc); } } indexWriter.Commit(); }
/// <summary> /// Sole constructor. /// </summary> protected internal TaxonomyFacets(string indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) { this.m_indexFieldName = indexFieldName; this.m_taxoReader = taxoReader; this.m_config = config; ParallelTaxonomyArrays pta = taxoReader.ParallelTaxonomyArrays; m_children = pta.Children; m_siblings = pta.Siblings; }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public DrillSidewaysAnonymousInnerClassHelper2(TestRangeFacetCounts outerInstance, IndexSearcher indexSearcher, FacetsConfig facetsConfig, TaxonomyReader org, ValueSource valueSource, DoubleRange[] doubleRanges, Filter filter) : base(indexSearcher, facetsConfig, org) { this.outerInstance = outerInstance; this.vs = valueSource; this.ranges = doubleRanges; this.fastMatchFilter = filter; }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public DrillSidewaysAnonymousInnerClassHelper(TestRangeFacetCounts outerInstance, IndexSearcher s, FacetsConfig config, TaxonomyReader tr) : base(s, config, tr) { this.outerInstance = outerInstance; }
public virtual void TestConcurrency() { AtomicInt32 numDocs = new AtomicInt32(AtLeast(10000)); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>(); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs.Get())); ThreadClass[] indexThreads = new ThreadClass[AtLeast(4)]; FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { config.SetHierarchical("l1." + i, true); config.SetMultiValued("l1." + i, true); } for (int i = 0; i < indexThreads.Length; i++) { indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config); } foreach (ThreadClass t in indexThreads) { t.Start(); } foreach (ThreadClass t in indexThreads) { t.Join(); } var tr = new DirectoryTaxonomyReader(tw); // +1 for root category if (values.Count + 1 != tr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (tr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } Fail("mismatch number of categories"); } int[] parents = tr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = tr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir); }
public virtual void TestSparseFacets() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); writer.AddDocument(config.Build(doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); writer.AddDocument(config.Build(doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); writer.Dispose(); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public ThreadAnonymousInnerClassHelper(TestConcurrentFacetedIndexing outerInstance, AtomicInt32 numDocs, ConcurrentDictionary <string, string> values, IndexWriter iw, Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter tw, FacetsConfig config) { this.outerInstance = outerInstance; this.numDocs = numDocs; this.values = values; this.iw = iw; this.tw = tw; this.config = config; }
public override int GetOrdinal(FacetLabel cp) { EnsureOpen(); if (cp.Length == 0) { return(ROOT_ORDINAL); } // First try to find the answer in the LRU cache: lock (ordinalCache) { IntClass res = ordinalCache.Get(cp); if (res != null && res.IntItem != null) { if ((int)res.IntItem.Value < indexReader.MaxDoc) { // Since the cache is shared with DTR instances allocated from // doOpenIfChanged, we need to ensure that the ordinal is one that // this DTR instance recognizes. return((int)res.IntItem.Value); } else { // if we get here, it means that the category was found in the cache, // but is not recognized by this TR instance. Therefore there's no // need to continue search for the path on disk, because we won't find // it there too. return(TaxonomyReader.INVALID_ORDINAL); } } } // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0); if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.DocID(); // we only store the fact that a category exists, not its inexistence. // This is required because the caches are shared with new DTR instances // that are allocated from doOpenIfChanged. Therefore, if we only store // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. lock (ordinalCache) { ordinalCache.Put(cp, new IntClass { IntItem = Convert.ToInt32(ret) }); } } return(ret); }
/// <summary> /// Look up the given category in the cache and/or the on-disk storage, /// returning the category's ordinal, or a negative number in case the /// category does not yet exist in the taxonomy. /// </summary> protected virtual int FindCategory(FacetLabel categoryPath) { lock (this) { // If we can find the category in the cache, or we know the cache is // complete, we can return the response directly from it int res = cache.Get(categoryPath); if (res >= 0 || cacheIsComplete) { return(res); } cacheMisses.IncrementAndGet(); // After a few cache misses, it makes sense to read all the categories // from disk and into the cache. The reason not to do this on the first // cache miss (or even when opening the writer) is that it will // significantly slow down the case when a taxonomy is opened just to // add one category. The idea only spending a long time on reading // after enough time was spent on cache misses is known as an "online // algorithm". PerhapsFillCache(); res = cache.Get(categoryPath); if (res >= 0 || cacheIsComplete) { // if after filling the cache from the info on disk, the category is in it // or the cache is complete, return whatever cache.get returned. return(res); } // if we get here, it means the category is not in the cache, and it is not // complete, and therefore we must look for the category on disk. // We need to get an answer from the on-disk index. InitReaderManager(); int doc = -1; DirectoryReader reader = readerManager.Acquire(); try { BytesRef catTerm = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length)); TermsEnum termsEnum = null; // reuse DocsEnum docs = null; // reuse foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL); if (terms != null) { termsEnum = terms.GetIterator(termsEnum); if (termsEnum.SeekExact(catTerm)) { // liveDocs=null because the taxonomy has no deletes docs = termsEnum.Docs(null, docs, 0); // freqs not required // if the term was found, we know it has exactly one document. doc = docs.NextDoc() + ctx.DocBase; break; } } } } finally { readerManager.Release(reader); } if (doc > 0) { AddToCache(categoryPath, doc); } return(doc); } }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
/// <summary> /// Create <see cref="TaxonomyFacetSumIntAssociations"/> against /// the default index field. /// </summary> public TaxonomyFacetSumIntAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc) { }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } result = bos.ToString(); } Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestRandomLongs() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir); int numDocs = AtLeast(1000); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } long[] values = new long[numDocs]; long minValue = long.MaxValue; long maxValue = long.MinValue; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); long v = Random().NextLong(); values[i] = v; doc.Add(new NumericDocValuesField("field", v)); doc.Add(new LongField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt(Random(), 1, 100); LongRange[] ranges = new LongRange[numRange]; int[] expectedCounts = new int[numRange]; long minAcceptedValue = long.MaxValue; long maxAcceptedValue = long.MinValue; for (int rangeID = 0; rangeID < numRange; rangeID++) { long min; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: LongRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { min = prevRange.min; } else { min = prevRange.max; } } else { min = Random().NextLong(); } long max; if (rangeID > 0 && Random().Next(10) == 7) { // Use an existing boundary: LongRange prevRange = ranges[Random().Next(rangeID)]; if (Random().NextBoolean()) { max = prevRange.min; } else { max = prevRange.max; } } else { max = Random().NextLong(); } if (min > max) { long x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random().NextBoolean(); maxIncl = Random().NextBoolean(); } ranges[rangeID] = new LongRange("r" + rangeID, min, minIncl, max, maxIncl); if (VERBOSE) { Console.WriteLine(" range " + rangeID + ": " + ranges[rangeID]); } // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random().NextBoolean()) { if (Random().NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewLongRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewLongRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new LongFieldSource("field"); Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); LongRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random().NextBoolean()) { if (Random().NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Close(w, r, dir); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public override int GetOrdinal(FacetLabel cp) { EnsureOpen(); if (cp.Length == 0) { return(ROOT_ORDINAL); } // First try to find the answer in the LRU cache: // LUCENENET: Despite LRUHashMap being thread-safe, we get much better performance // if reads are separated from writes. ordinalCacheLock.EnterReadLock(); try { if (ordinalCache.TryGetValue(cp, out Int32Class res)) { if (res < indexReader.MaxDoc) { // Since the cache is shared with DTR instances allocated from // doOpenIfChanged, we need to ensure that the ordinal is one that // this DTR instance recognizes. return(res); } else { // if we get here, it means that the category was found in the cache, // but is not recognized by this TR instance. Therefore there's no // need to continue search for the path on disk, because we won't find // it there too. return(TaxonomyReader.INVALID_ORDINAL); } } } finally { ordinalCacheLock.ExitReadLock(); } // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0); if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.DocID; // we only store the fact that a category exists, not its inexistence. // This is required because the caches are shared with new DTR instances // that are allocated from doOpenIfChanged. Therefore, if we only store // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. ordinalCacheLock.EnterWriteLock(); try { ordinalCache[cp] = ret; } finally { ordinalCacheLock.ExitWriteLock(); } } return(ret); }
/// <summary> /// Aggreggates float facet values from the provided /// <see cref="ValueSource"/>, pulling ordinals using <see cref="DocValuesOrdinalsReader"/> /// against the default indexed /// facet field <see cref="FacetsConfig.DEFAULT_INDEX_FIELD_NAME"/>. /// </summary> public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc, ValueSource valueSource) : this(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME), taxoReader, config, fc, valueSource) { }
private static void AddFacets(Document doc, FacetsConfig config, bool updateTermExpectedCounts) { IList<FacetField> docCategories = RandomCategories(Random()); foreach (FacetField ff in docCategories) { doc.Add(ff); string cp = ff.dim + "/" + ff.path[0]; allExpectedCounts[cp] = allExpectedCounts[cp] + 1; if (updateTermExpectedCounts) { termExpectedCounts[cp] = termExpectedCounts[cp] + 1; } } // add 1 to each NO_PARENTS dimension allExpectedCounts[CP_B] = allExpectedCounts[CP_B] + 1; allExpectedCounts[CP_C] = allExpectedCounts[CP_C] + 1; allExpectedCounts[CP_D] = allExpectedCounts[CP_D] + 1; if (updateTermExpectedCounts) { termExpectedCounts[CP_B] = termExpectedCounts[CP_B] + 1; termExpectedCounts[CP_C] = termExpectedCounts[CP_C] + 1; termExpectedCounts[CP_D] = termExpectedCounts[CP_D] + 1; } }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
// we need to guarantee that if several threads call this concurrently, only // one executes it, and after it returns, the cache is updated and is either // complete or not. private void PerhapsFillCache() { lock (this) { if (cacheMisses < cacheMissesUntilFill) { return; } if (!shouldFillCache) { // we already filled the cache once, there's no need to re-fill it return; } shouldFillCache = false; InitReaderManager(); bool aborted = false; DirectoryReader reader = readerManager.Acquire(); try { TermsEnum termsEnum = null; DocsEnum docsEnum = null; foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL); if (terms != null) // cannot really happen, but be on the safe side { termsEnum = terms.GetIterator(termsEnum); while (termsEnum.Next() != null) { if (!cache.IsFull) { BytesRef t = termsEnum.Term; // Since we guarantee uniqueness of categories, each term has exactly // one document. Also, since we do not allow removing categories (and // hence documents), there are no deletions in the index. Therefore, it // is sufficient to call next(), and then doc(), exactly once with no // 'validation' checks. FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString())); docsEnum = termsEnum.Docs(null, docsEnum, DocsFlags.NONE); bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase); Debug.Assert(!res, "entries should not have been evicted from the cache"); } else { // the cache is full and the next put() will evict entries from it, therefore abort the iteration. aborted = true; break; } } } if (aborted) { break; } } } finally { readerManager.Release(reader); } cacheIsComplete = !aborted; if (cacheIsComplete) { lock (this) { // everything is in the cache, so no need to keep readerManager open. // this block is executed in a sync block so that it works well with // initReaderManager called in parallel. readerManager.Dispose(); readerManager = null; initializedReaderManager = false; } } } }
public virtual void TestChildCount() { // LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator var indexDir = NewDirectory(); var taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new FacetField("a", Convert.ToString(i))); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector sfc = new FacetsCollector(); NewSearcher(r).Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); Assert.AreEqual(10, facets.GetTopChildren(2, "a").ChildCount); IOUtils.Close(taxoWriter, iw, taxoReader, taxoDir, r, indexDir); }
public virtual void TestConcurrency() { int ncats = AtLeast(100000); // add many categories int range = ncats * 3; // affects the categories selection AtomicInteger numCats = new AtomicInteger(ncats); Directory dir = NewDirectory(); var values = new ConcurrentDictionary <string, string>(); double d = Random().NextDouble(); TaxonomyWriterCache cache; if (d < 0.7) { // this is the fastest, yet most memory consuming cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3); } else if (TEST_NIGHTLY && d > 0.98) { // this is the slowest, but tests the writer concurrency when no caching is done. // only pick it during NIGHTLY tests, and even then, with very low chances. cache = NO_OP_CACHE; } else { // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too. cache = new LruTaxonomyWriterCache(ncats / 10); } if (VERBOSE) { Console.WriteLine("TEST: use cache=" + cache); } var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache); ThreadClass[] addThreads = new ThreadClass[AtLeast(4)]; for (int z = 0; z < addThreads.Length; z++) { addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw); } foreach (var t in addThreads) { t.Start(); } foreach (var t in addThreads) { t.Join(); } tw.Dispose(); DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir); // +1 for root category if (values.Count + 1 != dtr.Size) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (dtr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } Fail("mismatch number of categories"); } int[] parents = dtr.ParallelTaxonomyArrays.Parents(); foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = new FacetLabel(); for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = dtr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Close(dtr, dir); }