public virtual void TestAddSameDocTwice() { // LUCENE-5367: this was a problem with the previous code, making sure it // works with the new code. Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig facetsConfig = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "b")); doc = facetsConfig.Build(taxoWriter, doc); // these two addDocument() used to fail indexWriter.AddDocument(doc); indexWriter.AddDocument(doc); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector fc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), fc); Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc); FacetResult res = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, res.LabelValues.Length); Assert.AreEqual(2, res.LabelValues[0].Value); IOUtils.Close(indexReader, taxoReader); IOUtils.Close(indexDir, taxoDir); }
public virtual void TestDefault() { Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); // create and open an index writer var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); var config = Config; seedIndex(tw, iw, config); IndexReader ir = iw.Reader; tw.Commit(); // prepare index reader and taxonomy. var tr = new DirectoryTaxonomyReader(taxoDir); // prepare searcher to search against IndexSearcher searcher = NewSearcher(ir); FacetsCollector sfc = PerformSearch(tr, ir, searcher); // Obtain facets results and hand-test them AssertCorrectResults(GetTaxonomyFacetCounts(tr, config, sfc)); assertOrdinalsExist("$facets", ir); IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir); }
public override void BeforeClass() { base.BeforeClass(); dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random, dir, Similarity, TimeZone); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new Int32AssociationFacetField(2, "int", "a")); doc.Add(new SingleAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new Int32AssociationFacetField(3, "int", "b")); doc.Add(new SingleAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.GetReader(); writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestSegmentsWithoutCategoriesOrResults() { // tests the accumulator when there are segments with no results var indexDir = NewDirectory(); var taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges IndexWriter indexWriter = new IndexWriter(indexDir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher indexSearcher = NewSearcher(indexReader); // search for "f:a", only segments 1 and 3 should match results Query q = new TermQuery(new Term("f", "a")); FacetsCollector sfc = new FacetsCollector(); indexSearcher.Search(q, sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "A"); Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children"); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(2, (int)labelValue.Value, "wrong weight for child " + labelValue.Label); } IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestDifferentFieldsAndText() { Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); // create and open an index writer var iw = new RandomIndexWriter(Random, indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = Config; config.SetIndexFieldName("Band", "$bands"); config.SetIndexFieldName("Composer", "$composers"); seedIndex(tw, iw, config); IndexReader ir = iw.GetReader(); tw.Commit(); // prepare index reader and taxonomy. var tr = new DirectoryTaxonomyReader(taxoDir); // prepare searcher to search against IndexSearcher searcher = NewSearcher(ir); FacetsCollector sfc = PerformSearch(tr, ir, searcher); IDictionary <string, Facets> facetsMap = new Dictionary <string, Facets>(); facetsMap["Band"] = GetTaxonomyFacetCounts(tr, config, sfc, "$bands"); facetsMap["Composer"] = GetTaxonomyFacetCounts(tr, config, sfc, "$composers"); Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc)); // Obtain facets results and hand-test them AssertCorrectResults(facets); assertOrdinalsExist("$facets", ir); assertOrdinalsExist("$bands", ir); assertOrdinalsExist("$composers", ir); IOUtils.Dispose(tr, ir, iw, tw, indexDir, taxoDir); }
/// <summary>Build the example index.</summary> private void Index() { using (IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(EXAMPLE_VERSION, new WhitespaceAnalyzer(EXAMPLE_VERSION)))) // Writes facet ords to a separate directory from the main index using (DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir)) { Document doc = new Document(); doc.Add(new TextField("c", "foo bar", Field.Store.NO)); doc.Add(new NumericDocValuesField("popularity", 5L)); doc.Add(new FacetField("A", "B")); indexWriter.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new TextField("c", "foo foo bar", Field.Store.NO)); doc.Add(new NumericDocValuesField("popularity", 3L)); doc.Add(new FacetField("A", "C")); indexWriter.AddDocument(config.Build(taxoWriter, doc)); }// Disposes indexWriter and taxoWriter }
public virtual void TestSumScoreAggregator() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); if (Random().NextBoolean()) // don't match all documents { doc.Add(new StringField("f", "v", Field.Store.NO)); } doc.Add(new FacetField("dim", "a")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector fc = new FacetsCollector(true); ConstantScoreQuery csq = new ConstantScoreQuery(new MatchAllDocsQuery()); csq.Boost = 2.0f; TopDocs td = FacetsCollector.Search(NewSearcher(r), csq, 10, fc); Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, new TaxonomyFacetSumValueSource.ScoreValueSource()); int expected = (int)(td.MaxScore * td.TotalHits); Assert.AreEqual(expected, (int)facets.GetSpecificValue("dim", "a")); IOUtils.Close(iw, taxoWriter, taxoReader, taxoDir, r, indexDir); }
/// <summary>Build the example index.</summary> private void Index() { using IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(EXAMPLE_VERSION, new WhitespaceAnalyzer(EXAMPLE_VERSION))); // Writes facet ords to a separate directory from the main index using DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Bob"), new FacetField("Publish Date", "2010", "10", "15") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Lisa"), new FacetField("Publish Date", "2010", "10", "20") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Lisa"), new FacetField("Publish Date", "2012", "1", "1") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Susan"), new FacetField("Publish Date", "2012", "1", "7") })); indexWriter.AddDocument(config.Build(taxoWriter, new Document { new FacetField("Author", "Frank"), new FacetField("Publish Date", "1999", "5", "5") })); }
public virtual void TestDetectHierarchicalField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "other")); try { config.Build(taxoWriter, doc); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestDetectHierarchicalField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); var writer = new RandomIndexWriter(Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "other")); try { config.Build(taxoWriter, doc); fail("did not hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } IOUtils.Dispose(writer, taxoWriter, dir, taxoDir); }
public virtual void TestMixedTypesInSameIndexField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new IntAssociationFacetField(14, "a", "x")); doc.Add(new FloatAssociationFacetField(55.0f, "b", "y")); try { writer.AddDocument(config.Build(taxoWriter, doc)); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassCountingFacetsAggregatorTest() to ensure calling order { base.BeforeClass(); indexDir = NewDirectory(); taxoDir = NewDirectory(); // create an index which has: // 1. Segment with no categories, but matching results // 2. Segment w/ categories, but no results // 3. Segment w/ categories and results // 4. Segment w/ categories, but only some results IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.MergePolicy = NoMergePolicy.COMPOUND_FILES; // prevent merges, so we can control the index segments IndexWriter indexWriter = new IndexWriter(indexDir, conf); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); allExpectedCounts = newCounts(); termExpectedCounts = newCounts(); // segment w/ no categories IndexDocsNoFacets(indexWriter); // segment w/ categories, no content IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and content IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and some content IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); IOUtils.Dispose(indexWriter, taxoWriter); }
public virtual void TestMixedTypesInSameIndexField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir); Document doc = new Document(); doc.Add(new Int32AssociationFacetField(14, "a", "x")); doc.Add(new SingleAssociationFacetField(55.0f, "b", "y")); try { writer.AddDocument(config.Build(taxoWriter, doc)); fail("did not hit expected exception"); } catch (Exception exc) when(exc.IsIllegalArgumentException()) { // expected } IOUtils.Dispose(writer, taxoWriter, dir, taxoDir); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); // Uses default $facets field: Facets facets; if (Random().NextBoolean()) { facets = new FastTaxonomyFacetCounts(taxoReader, config, c); } else { OrdinalsReader ordsReader = new DocValuesOrdinalsReader(); if (Random().NextBoolean()) { ordsReader = new CachedOrdinalsReader(ordsReader); } facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c); } // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
public virtual void TestWriterLock() { // native fslock impl gets angry if we use it, so use RAMDirectory explicitly. var indexDir = new RAMDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); // we deliberately not close the write now, and keep it open and // locked. // Verify that the writer worked: var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(2, tr.GetOrdinal(new FacetLabel("hi", "there"))); // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { Assert.Null(new DirectoryTaxonomyWriter(indexDir)); Fail("should have failed to write in locked directory"); } catch (LockObtainFailedException) { // this is what we expect to happen. } // Remove the lock, and now the open should succeed, and we can // write to the new writer. DirectoryTaxonomyWriter.Unlock(indexDir); var tw2 = new DirectoryTaxonomyWriter(indexDir); tw2.AddCategory(new FacetLabel("hey")); tw2.Dispose(); // See that the writer indeed wrote: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; Assert.AreEqual(3, tr.GetOrdinal(new FacetLabel("hey"))); tr.Dispose(); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random.NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public virtual void TestSeparateReaderAndWriter() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Count); // the empty taxonomy has size 1 (the root) tw.AddCategory(new FacetLabel("Author")); Assert.AreEqual(1, tr.Count); // still root only... Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet Assert.AreEqual(1, tr.Count); // still root only... tw.Commit(); Assert.AreEqual(1, tr.Count); // still root only... var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; int author = 1; try { Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tr.ParallelTaxonomyArrays.Parents[author]); // ok } catch (System.IndexOutOfRangeException) { Fail("After category addition, commit() and refresh(), getParent for " + author + " should NOT throw exception"); } Assert.AreEqual(2, tr.Count); // finally, see there are two categories // now, add another category, and verify that after commit and refresh // the parent of this category is correct (this requires the reader // to correctly update its prefetched parent vector), and that the // old information also wasn't ruined: tw.AddCategory(new FacetLabel("Author", "Richard Dawkins")); int dawkins = 2; tw.Commit(); newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; int[] parents = tr.ParallelTaxonomyArrays.Parents; Assert.AreEqual(author, parents[dawkins]); Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, parents[author]); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); Assert.AreEqual(3, tr.Count); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterTwice3() { var indexDir = NewDirectory(); // First, create and fill the taxonomy var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); // Now, open the same taxonomy and add the same categories again. // After a few categories, the LuceneTaxonomyWriter implementation // will stop looking for each category on disk, and rather read them // all into memory and close it's reader. The bug was that it closed // the reader, but forgot that it did (because it didn't set the reader // reference to null). tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); // Add one new category, just to make commit() do something: tw.AddCategory(new FacetLabel("hi")); // Do a commit(). Here was a bug - if tw had a reader open, it should // be reopened after the commit. However, in our case the reader should // not be open (as explained above) but because it was not set to null, // we forgot that, tried to reopen it, and got an AlreadyClosedException. tw.Commit(); Assert.AreEqual(ExpectedCategories.Length + 1, tw.Count); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterTwice() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); // run fillTaxonomy again - this will try to add the same categories // again, and check that we see the same ordinal paths again, not // different ones. FillTaxonomy(tw); // Let's check the number of categories again, to see that no // extraneous categories were created: Assert.AreEqual(ExpectedCategories.Length, tw.Count); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterTwice2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); tw = new DirectoryTaxonomyWriter(indexDir); // run fillTaxonomy again - this will try to add the same categories // again, and check that we see the same ordinals again, not different // ones, and that the number of categories hasn't grown by the new // additions FillTaxonomy(tw); Assert.AreEqual(ExpectedCategories.Length, tw.Count); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestWriter() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); // Also check ITaxonomyWriter.getSize() - see that the taxonomy's size // is what we expect it to be. Assert.AreEqual(ExpectedCategories.Length, tw.Count); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestNrt() { var dir = NewDirectory(); var writer = new DirectoryTaxonomyWriter(dir); var reader = new DirectoryTaxonomyReader(writer); FacetLabel cp = new FacetLabel("a"); writer.AddCategory(cp); var newReader = TaxonomyReader.OpenIfChanged(reader); Assert.NotNull(newReader, "expected a new instance"); Assert.AreEqual(2, newReader.Count); Assert.AreNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.GetOrdinal(cp)); reader.Dispose(); reader = newReader; writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestWriterCheckPaths2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); CheckPaths(tw); FillTaxonomy(tw); CheckPaths(tw); tw.Dispose(); tw = new DirectoryTaxonomyWriter(indexDir); CheckPaths(tw); FillTaxonomy(tw); CheckPaths(tw); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestReplaceTaxonomyNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); Store.Directory taxoDir2 = NewDirectory(); var tw2 = new DirectoryTaxonomyWriter(taxoDir2); tw2.Dispose(); var mgr = new SearcherTaxonomyManager(w, true, null, tw); w.AddDocument(new Document()); tw.ReplaceTaxonomy(taxoDir2); taxoDir2.Dispose(); try { mgr.MaybeRefresh(); Fail("should have hit exception"); } catch (InvalidOperationException) { // expected } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public virtual void TestWriterSimpler() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); Assert.AreEqual(1, tw.Count); // the root only // Test that adding a new top-level category works Assert.AreEqual(1, tw.AddCategory(new FacetLabel("a"))); Assert.AreEqual(2, tw.Count); // Test that adding the same category again is noticed, and the // same ordinal (and not a new one) is returned. Assert.AreEqual(1, tw.AddCategory(new FacetLabel("a"))); Assert.AreEqual(2, tw.Count); // Test that adding another top-level category returns a new ordinal, // not the same one Assert.AreEqual(2, tw.AddCategory(new FacetLabel("b"))); Assert.AreEqual(3, tw.Count); // Test that adding a category inside one of the above adds just one // new ordinal: Assert.AreEqual(3, tw.AddCategory(new FacetLabel("a", "c"))); Assert.AreEqual(4, tw.Count); // Test that adding the same second-level category doesn't do anything: Assert.AreEqual(3, tw.AddCategory(new FacetLabel("a", "c"))); Assert.AreEqual(4, tw.Count); // Test that adding a second-level category with two new components // indeed adds two categories Assert.AreEqual(5, tw.AddCategory(new FacetLabel("d", "e"))); Assert.AreEqual(6, tw.Count); // Verify that the parents were added above in the order we expected Assert.AreEqual(4, tw.AddCategory(new FacetLabel("d"))); // Similar, but inside a category that already exists: Assert.AreEqual(7, tw.AddCategory(new FacetLabel("b", "d", "e"))); Assert.AreEqual(8, tw.Count); // And now inside two levels of categories that already exist: Assert.AreEqual(8, tw.AddCategory(new FacetLabel("b", "d", "f"))); Assert.AreEqual(9, tw.Count); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public virtual void TestRootOnly() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); // right after opening the index, it should already contain the // root, so have size 1: Assert.AreEqual(1, tw.Count); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Count); Assert.AreEqual(0, tr.GetPath(0).Length); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents[0]); Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel())); tr.Dispose(true); indexDir.Dispose(); }
private void AssertConsistentYoungestChild(FacetLabel abPath, int abOrd, int abYoungChildBase1, int abYoungChildBase2, int retry, int numCategories) { var indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("a", "0")); tw.AddCategory(abPath); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); for (int i = 0; i < numCategories; i++) { var cp = new FacetLabel("a", "b", Convert.ToString(i)); tw.AddCategory(cp); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(cp), "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed"); } tw.Dispose(); var stop = new AtomicBoolean(false); Exception[] error = new Exception[] { null }; int[] retrieval = new int[] { 0 }; var thread = new ThreadAnonymousInnerClassHelper(this, abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, tr, stop, error, retrieval); thread.Start(); indexDir.SleepMillis = 1; // some delay for refresh var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); if (newTaxoReader != null) { newTaxoReader.Dispose(); } stop.Set(true); thread.Join(); Assert.Null(error[0], "Unexpcted exception at retry " + retry + " retrieval " + retrieval[0] + ": \n" + stackTraceStr(error[0])); tr.Dispose(); }
public virtual void TestRootOnly2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Count); Assert.AreEqual(0, tr.GetPath(0).Length); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents[0]); Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel())); tw.Dispose(); tr.Dispose(true); indexDir.Dispose(); }
public virtual void TestSeparateReaderAndWriter2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); // Test getOrdinal(): FacetLabel author = new FacetLabel("Author"); Assert.AreEqual(1, tr.Count); // the empty taxonomy has size 1 (the root) Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); tw.AddCategory(author); // before commit and refresh, no change: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Count); // still root only... Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Count); // still root only... tw.Commit(); // still not enough before refresh: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Count); // still root only... var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; Assert.AreEqual(1, tr.GetOrdinal(author)); Assert.AreEqual(2, tr.Count); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new Int32Field("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexDir); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt32(Random, 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); testDoc.value = Random.NextSingle(); doc.Add(new SingleDocValuesField("value", testDoc.value)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.GetReader()); // NRT open var tr = new DirectoryTaxonomyReader(tw); ValueSource values = new SingleFieldSource("value"); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random.Next(tokens.Length)]; if (Verbose) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values); // Slow, yet hopefully bug-free, faceting: var expectedValues = new List <Dictionary <string, float?> >(numDims); for (int i = 0; i < numDims; i++) { expectedValues.Add(new Dictionary <string, float?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken, StringComparison.Ordinal)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { if (!expectedValues[j].TryGetValue(doc.dims[j], out float?v) || v == null) { expectedValues[j][doc.dims[j]] = doc.value; } else { expectedValues[j][doc.dims[j]] = (float)v + doc.value; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); float totValue = 0; foreach (KeyValuePair <string, float?> ent in expectedValues[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totValue += ent.Value.Value; } SortLabelValues(labelValues); if (totValue > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); if (Verbose) { Console.WriteLine("expected=\n" + expected.ToString()); Console.WriteLine("actual=\n" + actual.ToString()); } AssertFloatValuesEquals(expected, actual); } IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList <FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Close(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void TestReplaceTaxonomyDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); w.Commit(); tw.Commit(); Store.Directory taxoDir2 = NewDirectory(); var tw2 = new DirectoryTaxonomyWriter(taxoDir2); tw2.AddCategory(new FacetLabel("a", "b")); tw2.Dispose(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); SearcherAndTaxonomy pair = mgr.Acquire(); try { Assert.AreEqual(1, pair.taxonomyReader.Size); } finally { mgr.Release(pair); } w.AddDocument(new Document()); tw.ReplaceTaxonomy(taxoDir2); taxoDir2.Dispose(); w.Commit(); tw.Commit(); mgr.MaybeRefresh(); pair = mgr.Acquire(); try { Assert.AreEqual(3, pair.taxonomyReader.Size); } finally { mgr.Release(pair); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public static void BeforeClassCountingFacetsAggregatorTest() { indexDir = NewDirectory(); taxoDir = NewDirectory(); // create an index which has: // 1. Segment with no categories, but matching results // 2. Segment w/ categories, but no results // 3. Segment w/ categories and results // 4. Segment w/ categories, but only some results IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); //conf.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges, so we can control the index segments IndexWriter indexWriter = new IndexWriter(indexDir, conf); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); allExpectedCounts = newCounts(); termExpectedCounts = newCounts(); // segment w/ no categories IndexDocsNoFacets(indexWriter); // segment w/ categories, no content IndexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and content IndexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); // segment w/ categories and some content IndexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); IOUtils.Close(indexWriter, taxoWriter); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new FacetField("Author", "Bob")); doc.Add(new FacetField("Publish Date", "2010", "10", "15")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2010", "10", "20")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Lisa")); doc.Add(new FacetField("Publish Date", "2012", "1", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Susan")); doc.Add(new FacetField("Publish Date", "2012", "1", "7")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("Author", "Frank")); doc.Add(new FacetField("Publish Date", "1999", "5", "5")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.Search(new MatchAllDocsQuery(), c); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, c); // Retrieve & verify results: Assert.AreEqual("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", facets.GetTopChildren(10, "Publish Date").ToString()); Assert.AreEqual("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", facets.GetTopChildren(10, "Author").ToString()); // Now user drills down on Publish Date/2010: DrillDownQuery q2 = new DrillDownQuery(config); q2.Add("Publish Date", "2010"); c = new FacetsCollector(); searcher.Search(q2, c); facets = new FastTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", facets.GetTopChildren(10, "Author").ToString()); Assert.AreEqual(1, facets.GetSpecificValue("Author", "Lisa")); Assert.Null(facets.GetTopChildren(10, "Non exitent dim")); // Smoke test PrintTaxonomyStats: string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } result = bos.ToString(); } Assert.True(result.IndexOf("/Author: 4 immediate children; 5 total categories", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf("/Publish Date: 3 immediate children; 12 total categories", StringComparison.Ordinal) != -1); // Make sure at least a few nodes of the tree came out: Assert.True(result.IndexOf(" /1999", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /2012", StringComparison.Ordinal) != -1); Assert.True(result.IndexOf(" /20", StringComparison.Ordinal) != -1); IOUtils.Close(writer, taxoWriter, searcher.IndexReader, taxoReader, taxoDir, dir); }
public virtual void TestCustom() { Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); // create and open an index writer RandomIndexWriter iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = Config; config.SetIndexFieldName("Author", "$author"); seedIndex(tw, iw, config); IndexReader ir = iw.Reader; tw.Commit(); // prepare index reader and taxonomy. var tr = new DirectoryTaxonomyReader(taxoDir); // prepare searcher to search against IndexSearcher searcher = NewSearcher(ir); FacetsCollector sfc = PerformSearch(tr, ir, searcher); IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>(); facetsMap["Author"] = GetTaxonomyFacetCounts(tr, config, sfc, "$author"); Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc)); // Obtain facets results and hand-test them AssertCorrectResults(facets); assertOrdinalsExist("$facets", ir); assertOrdinalsExist("$author", ir); IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir); }
/// <summary> /// Add text to the existing index. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="addTextData">The text data to add.</param> /// <param name="config">The facet configuration information.</param> public void AddText(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, AddTextData[]> addTextData, FacetsConfig config) { long totalTextLength = 0; long maxTextLengthBeforeCommit = 30000000L; // For each text facet. foreach (KeyValuePair <FacetField, AddTextData[]> item in addTextData) { // If text exists. if (item.Value != null && item.Value.Length > 0) { // Add the text. FieldType nameFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // Add the text. FieldType completeFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // Add the text. FieldType textFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = false, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each text. foreach (AddTextData data in item.Value) { // Should the data be stored. completeFieldType.Stored = data.StoreText; // Create the document. Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field textName = new Field("textname", data.Name.ToLower(), nameFieldType); Lucene.Net.Documents.Field textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType); document.Add(item.Key); document.Add(textName); document.Add(textComplete); // Split the white spaces from the text. string[] words = data.Text.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field textData = new Field("facetcontent", word, textFieldType); document.Add(textData); } } } // Add the document. writer.AddDocument(config.Build(facetWriter, document)); // Commit after a set number of documents. totalTextLength += (long)data.Text.Length; if (totalTextLength > maxTextLengthBeforeCommit) { // Commit the index. writer.Commit(); facetWriter.Commit(); totalTextLength = 0; } } } } // Commit the index. writer.Commit(); facetWriter.Commit(); }
public void BeforeClassDrillDownQueryTest() { dir = NewDirectory(); Random r = Random(); RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false))); taxoDir = NewDirectory(); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); config = new FacetsConfig(); // Randomize the per-dim config: config.SetHierarchical("a", Random().NextBoolean()); config.SetMultiValued("a", Random().NextBoolean()); if (Random().NextBoolean()) { config.SetIndexFieldName("a", "$a"); } config.SetRequireDimCount("a", true); config.SetHierarchical("b", Random().NextBoolean()); config.SetMultiValued("b", Random().NextBoolean()); if (Random().NextBoolean()) { config.SetIndexFieldName("b", "$b"); } config.SetRequireDimCount("b", true); for (int i = 0; i < 100; i++) { Document doc = new Document(); if (i % 2 == 0) // 50 { doc.Add(new TextField("content", "foo", Field.Store.NO)); } if (i % 3 == 0) // 33 { doc.Add(new TextField("content", "bar", Field.Store.NO)); } if (i % 4 == 0) // 25 { if (r.NextBoolean()) { doc.Add(new FacetField("a", "1")); } else { doc.Add(new FacetField("a", "2")); } } if (i % 5 == 0) // 20 { doc.Add(new FacetField("b", "1")); } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxo = new DirectoryTaxonomyReader(taxoDir); }
/// <summary> /// Add documents. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param> /// <param name="files">The list of files that are to be added.</param> /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param> /// <param name="facetField">The facet field information.</param> /// <param name="config">The facet configuration information.</param> public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config) { FieldType pathFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = false, Stored = true, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; FieldType contentFieldType = new Lucene.Net.Documents.FieldType() { Indexed = true, Tokenized = documents.TokenizeContent, Stored = documents.StoreContent, IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, }; // For each file. for (int i = 0; i < files.Length; i++) { // If the file exists if (File.Exists(files[i])) { Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); try { FileInfo fileInfo = new FileInfo(files[i]); string file = files[i].Replace(directoryInfo.Root.FullName, "").ToLower(); Lucene.Net.Documents.Field path = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType); Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType); // Add the fields. document.Add(facetField); document.Add(path); document.Add(modified); // Create the stream reader. OpenDocument(files[i]); string content = Nequeo.Xml.Document.ExtractContent(_xDocument); // If content exists. if (!String.IsNullOrEmpty(content)) { // Split the white spaces from the text. string[] words = content.Words(); // If words exist. if (words != null && words.Length > 0) { // Add the query for each word. for (int j = 0; j < words.Length; j++) { // Format the word. string word = words[j].ToLower().RemovePunctuationFromStartAndEnd(); // If a word exists. if (!String.IsNullOrEmpty(word)) { Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType); document.Add(contentField); } } } } // Add the document. writer.AddDocument(config.Build(facetWriter, document)); _document.Close(); // Commit after a set number of documents. documents.TotalDocumentSize += fileInfo.Length; if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit) { // Commit the index. writer.Commit(); facetWriter.Commit(); documents.TotalDocumentSize = 0; } } catch (Exception) { throw; } finally { CloseDocument(); } } } }
public virtual void TestMixedRangeAndNonRangeTaxonomy() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Directory td = NewDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.Add(new NumericDocValuesField("field", l)); // For drill down by numeric range: doc.Add(new Int64Field("field", l, Field.Store.NO)); if ((l & 3) == 0) { doc.Add(new FacetField("dim", "a")); } else { doc.Add(new FacetField("dim", "b")); } w.AddDocument(config.Build(tw, doc)); } IndexReader r = w.Reader; var tr = new DirectoryTaxonomyReader(tw); IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + s); } DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); // First search, no drill downs: DrillDownQuery ddq = new DrillDownQuery(config); DrillSidewaysResult dsr = ds.Search(null, ddq, 10); Assert.AreEqual(100, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Second search, drill down on dim=b: ddq = new DrillDownQuery(config); ddq.Add("dim", "b"); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(75, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); // Third search, drill down on "less than or equal to 10": ddq = new DrillDownQuery(config); ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true)); dsr = ds.Search(null, ddq, 10); Assert.AreEqual(11, dsr.Hits.TotalHits); Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(tw, tr, td, w, r, d); }
public virtual void TestReaderBasic() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); // test TaxonomyReader.getSize(): Assert.AreEqual(ExpectedCategories.Length, tr.Count); // test round trips of ordinal => category => ordinal for (int i = 0; i < tr.Count; i++) { Assert.AreEqual(i, tr.GetOrdinal(tr.GetPath(i))); } // test TaxonomyReader.getCategory(): for (int i = 1; i < tr.Count; i++) { FacetLabel expectedCategory = new FacetLabel(ExpectedCategories[i]); FacetLabel category = tr.GetPath(i); if (!expectedCategory.Equals(category)) { Fail("For ordinal " + i + " expected category " + Showcat(expectedCategory) + ", but got " + Showcat(category)); } } // (also test invalid ordinals:) Assert.Null(tr.GetPath(-1)); Assert.Null(tr.GetPath(tr.Count)); Assert.Null(tr.GetPath(TaxonomyReader.INVALID_ORDINAL)); // test TaxonomyReader.GetOrdinal(): for (int i = 1; i < ExpectedCategories.Length; i++) { int expectedOrdinal = i; int ordinal = tr.GetOrdinal(new FacetLabel(ExpectedCategories[i])); if (expectedOrdinal != ordinal) { Fail("For category " + Showcat(ExpectedCategories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } } // (also test invalid categories:) Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(new FacetLabel("non-existant"))); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(new FacetLabel("Author", "Jules Verne"))); tr.Dispose(); indexDir.Dispose(); }
/// <summary> /// Initializes a new instance of the <see cref="LuceneIndex" /> class. /// </summary> /// <param name="indexPath">The path to the directory that will contain the Lucene index files.</param> /// <param name="schema">The schema.</param> /// <exception cref="System.ArgumentNullException"></exception> public LuceneIndex(string indexPath, Schema schema) { if (String.IsNullOrWhiteSpace(indexPath)) throw new ArgumentNullException(nameof(indexPath)); if (schema == null) throw new ArgumentNullException(nameof(schema)); IndexPath = indexPath; Schema = schema; if (System.IO.Directory.Exists(IndexPath)) { if (Schema.IsDefault()) throw new InvalidOperationException($"There is an existing index on '{IndexPath}'."); } else { System.IO.Directory.CreateDirectory(IndexPath); } _indexDirectory = new MMapDirectory(Paths.get(IndexPath)); var taxonomyIndexPath = System.IO.Path.Combine(IndexPath, "taxonomy"); if (!System.IO.Directory.Exists(taxonomyIndexPath)) System.IO.Directory.CreateDirectory(taxonomyIndexPath); _taxonomyDirectory = new MMapDirectory(Paths.get(taxonomyIndexPath)); _compositeAnalyzer = new CompositeAnalyzer(Schema); _ramBufferSizeMB = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.RAMBufferSizeMB"] ?? "128"); var config = new IndexWriterConfig(_compositeAnalyzer) .SetOpenMode(IndexWriterConfigOpenMode.CREATE_OR_APPEND) .SetRAMBufferSizeMB(_ramBufferSizeMB) .SetCommitOnClose(true); _indexWriter = new IndexWriter(_indexDirectory, config); _taxonomyWriter = new DirectoryTaxonomyWriter(_taxonomyDirectory, IndexWriterConfigOpenMode.CREATE_OR_APPEND); _searcherTaxonomyManager = new SearcherTaxonomyManager(_indexWriter, true, null, _taxonomyWriter); _facetBuilder = new LuceneFacetBuilder(_taxonomyWriter); _refreshIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexSearcher.RefreshIntervalSeconds"] ?? "0.5"); _commitIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.CommitIntervalSeconds"] ?? "60"); _writeAllowedFlag = new ManualResetEventSlim(true); _refreshTimer = new Timer(o => Refresh(), null, TimeSpan.FromSeconds(_refreshIntervalSeconds), TimeSpan.FromSeconds(_refreshIntervalSeconds)); _commitTimer = new Timer(o => Commit(), null, TimeSpan.FromSeconds(_commitIntervalSeconds), TimeSpan.FromSeconds(_commitIntervalSeconds)); }
public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassDrillDownQueryTest() to ensure calling order { base.BeforeClass(); dir = NewDirectory(); Random r = Random; RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false))); taxoDir = NewDirectory(); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); config = new FacetsConfig(); // Randomize the per-dim config: config.SetHierarchical("a", Random.NextBoolean()); config.SetMultiValued("a", Random.NextBoolean()); if (Random.NextBoolean()) { config.SetIndexFieldName("a", "$a"); } config.SetRequireDimCount("a", true); config.SetHierarchical("b", Random.NextBoolean()); config.SetMultiValued("b", Random.NextBoolean()); if (Random.NextBoolean()) { config.SetIndexFieldName("b", "$b"); } config.SetRequireDimCount("b", true); for (int i = 0; i < 100; i++) { Document doc = new Document(); if (i % 2 == 0) // 50 { doc.Add(new TextField("content", "foo", Field.Store.NO)); } if (i % 3 == 0) // 33 { doc.Add(new TextField("content", "bar", Field.Store.NO)); } if (i % 4 == 0) // 25 { if (r.NextBoolean()) { doc.Add(new FacetField("a", "1")); } else { doc.Add(new FacetField("a", "2")); } } if (i % 5 == 0) // 20 { doc.Add(new FacetField("b", "1")); } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.GetReader(); writer.Dispose(); taxo = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestReaderParent() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); // check that the parent of the root ordinal is the invalid ordinal: int[] parents = tr.ParallelTaxonomyArrays.Parents; Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[0]); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.Count; ordinal++) { FacetLabel me = tr.GetPath(ordinal); int parentOrdinal = parents[ordinal]; FacetLabel parent = tr.GetPath(parentOrdinal); if (parent == null) { Fail("Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the strings if (!me.Subpath(me.Length - 1).Equals(parent)) { Fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + Showcat(parent) + " and " + Showcat(me) + " respectively."); } } tr.Dispose(); indexDir.Dispose(); }
/// <summary> /// Add documents to the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="directoryFacetInfo">The directory infomation where the facet files are to be placed.</param> /// <param name="facetData">The complete facet information used to build the index information.</param> public void AddMultiFacetDocuments(DirectoryInfo directoryIndexInfo, DirectoryInfo directoryFacetInfo, FacetData facetData) { Lucene.Net.Index.IndexWriter writer = null; DirectoryTaxonomyWriter facetWriter = null; Lucene.Net.Store.Directory directory = null; Lucene.Net.Store.Directory directoryFacet = null; try { if (facetData != null) { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Create the facet writer. directoryFacet = FSDirectory.Open(directoryFacetInfo); facetWriter = new DirectoryTaxonomyWriter(directoryFacet, IndexWriterConfig.OpenMode_e.APPEND); // Create the facet filter. FacetFilter filter = new FacetFilter(); filter.AddDocuments(writer, facetWriter, facetData); // Commit the index. writer.Commit(); facetWriter.Commit(); } } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (facetWriter != null) { facetWriter.Dispose(); } if (directory != null) { directory.Dispose(); } if (directoryFacet != null) { directoryFacet.Dispose(); } } }
public virtual void TestWriterParent2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); CheckWriterParent(tr, tw); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
/// <summary> /// Add text to the existing index. /// </summary> /// <param name="writer">The index writer.</param> /// <param name="facetWriter">The facet index writer.</param> /// <param name="addFileData">The file data to add.</param> /// <param name="config">The facet configuration information.</param> public void AddFile(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, FileFacetModel> addFileData, FacetsConfig config) { Nequeo.IO.Directory directory = new Nequeo.IO.Directory(); // For each file facet. foreach (KeyValuePair <FacetField, FileFacetModel> item in addFileData) { // Select the document format filter. // If html has been selected. if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Html)) { // Create the html filter. HtmlFilter htmlFilter = new HtmlFilter(); string[] files = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Html)); htmlFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config); } // If pdf has been selected. if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Pdf)) { // Create the pdf filter. PdfFilter pdfFilter = new PdfFilter(); string[] files = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Pdf)); pdfFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config); } // If rtf has been selected. if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Rtf)) { // Create the rtf filter. RtfFilter rtfFilter = new RtfFilter(); string[] files = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Rtf)); rtfFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config); } // If txt has been selected. if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Txt)) { // Create the txt filter. TxtFilter txtFilter = new TxtFilter(); string[] files = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Txt)); txtFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config); } // If xml has been selected. if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Xml)) { // Create the xml filter. XmlFilter xmlFilter = new XmlFilter(); string[] files = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Xml)); xmlFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config); } // If docx has been selected. if (item.Value.Documents.SupportedDocuments.HasFlag(SupportedDocuments.Docx)) { // Create the docx filter. MSDocFilter docxFilter = new MSDocFilter(); string[] files = directory.GetFiles(item.Value.DirectoryInfo.FullName, item.Value.Documents.GetFormattedSearchPatterns(SupportedDocuments.Docx)); docxFilter.AddDocuments(writer, facetWriter, item.Value.DirectoryInfo, files, item.Value.Documents, item.Key, config); } } }
public virtual void TestChildrenArrays() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; int[] youngestChildArray = ca.Children; Assert.AreEqual(tr.Count, youngestChildArray.Length); int[] olderSiblingArray = ca.Siblings; Assert.AreEqual(tr.Count, olderSiblingArray.Length); for (int i = 0; i < ExpectedCategories.Length; i++) { // find expected children by looking at all expectedCategories // for children List<int?> expectedChildren = new List<int?>(); for (int j = ExpectedCategories.Length - 1; j >= 0; j--) { if (ExpectedCategories[j].Length != ExpectedCategories[i].Length + 1) { continue; // not longer by 1, so can't be a child } bool ischild = true; for (int k = 0; k < ExpectedCategories[i].Length; k++) { if (!ExpectedCategories[j][k].Equals(ExpectedCategories[i][k])) { ischild = false; break; } } if (ischild) { expectedChildren.Add(j); } } // check that children and expectedChildren are the same, with the // correct reverse (youngest to oldest) order: if (expectedChildren.Count == 0) { Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]); } else { int child = youngestChildArray[i]; Assert.AreEqual((int)expectedChildren[0], child); for (int j = 1; j < expectedChildren.Count; j++) { child = olderSiblingArray[child]; Assert.AreEqual((int)expectedChildren[j], child); // if child is INVALID_ORDINAL we should stop, but // AssertEquals would fail in this case anyway. } // When we're done comparing, olderSiblingArray should now point // to INVALID_ORDINAL, saying there are no more children. If it // doesn't, we found too many children... Assert.AreEqual(-1, olderSiblingArray[child]); } } tr.Dispose(); indexDir.Dispose(); }
public virtual void TestRandomSampling() { Directory dir = NewDirectory(); Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(10000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO)); doc.Add(new FacetField("iMod10", Convert.ToString(i % 10, CultureInfo.InvariantCulture))); writer.AddDocument(config.Build(taxoWriter, doc)); } Random random = Random; // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); IOUtils.Dispose(writer, taxoWriter); // Test empty results RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64()); // There should be no divisions by zero searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults); // There should be no divisions by zero and no null result Assert.IsNotNull(collectRandomZeroResults.GetMatchingDocs()); // There should be no results at all foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs()) { Assert.AreEqual(0, doc.TotalHits); } // Now start searching and retrieve results. // Use a query to select half of the documents. TermQuery query = new TermQuery(new Term("EvenOdd", "even")); // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i % // 10) are hits. // there is a REAL small chance that one of the 5 values will be missed when // sampling. // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be // missing) ~ 10^-193 // so that is probably not going to happen. int maxNumChildren = 5; RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextInt64()); // no sampling RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64()); // 10 % of total docs, 20% of the hits FacetsCollector fc = new FacetsCollector(); searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent)); FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent); FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent); FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc); FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher); FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10"); FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10"); Assert.AreEqual(random100Result, exactResult); // we should have five children, but there is a small chance we have less. // (see above). Assert.IsTrue(random10Result.ChildCount <= maxNumChildren); // there should be one child at least. Assert.IsTrue(random10Result.ChildCount >= 1); // now calculate some statistics to determine if the sampled result is 'ok'. // because random sampling is used, the results will vary each time. int sum = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { sum += (int)lav.Value; } float mu = (float)sum / (float)maxNumChildren; float variance = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { variance += (float)Math.Pow((mu - (int)lav.Value), 2); } variance = variance / maxNumChildren; float sigma = (float)Math.Sqrt(variance); // we query only half the documents and have 5 categories. The average // number of docs in a category will thus be the total divided by 5*2 float targetMu = numDocs / (5.0f * 2.0f); // the average should be in the range and the standard deviation should not // be too great Assert.IsTrue(sigma < 200); Assert.IsTrue(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma); IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestChildrenArraysInvariants() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; int[] children = ca.Children; Assert.AreEqual(tr.Count, children.Length); int[] olderSiblingArray = ca.Siblings; Assert.AreEqual(tr.Count, olderSiblingArray.Length); // test that the "youngest child" of every category is indeed a child: int[] parents = tr.ParallelTaxonomyArrays.Parents; for (int i = 0; i < tr.Count; i++) { int youngestChild = children[i]; if (youngestChild != TaxonomyReader.INVALID_ORDINAL) { Assert.AreEqual(i, parents[youngestChild]); } } // test that the "older sibling" of every category is indeed older (lower) // (it can also be INVALID_ORDINAL, which is lower than any ordinal) for (int i = 0; i < tr.Count; i++) { Assert.True(olderSiblingArray[i] < i, "olderSiblingArray[" + i + "] should be <" + i); } // test that the "older sibling" of every category is indeed a sibling // (they share the same parent) for (int i = 0; i < tr.Count; i++) { int sibling = olderSiblingArray[i]; if (sibling == TaxonomyReader.INVALID_ORDINAL) { continue; } Assert.AreEqual(parents[i], parents[sibling]); } // And now for slightly more complex (and less "invariant-like"...) // tests: // test that the "youngest child" is indeed the youngest (so we don't // miss the first children in the chain) for (int i = 0; i < tr.Count; i++) { // Find the really youngest child: int j; for (j = tr.Count - 1; j > i; j--) { if (parents[j] == i) { break; // found youngest child } } if (j == i) // no child found { j = TaxonomyReader.INVALID_ORDINAL; } Assert.AreEqual(j, children[i]); } // test that the "older sibling" is indeed the least oldest one - and // not a too old one or -1 (so we didn't miss some children in the // middle or the end of the chain). for (int i = 0; i < tr.Count; i++) { // Find the youngest older sibling: int j; for (j = i - 1; j >= 0; j--) { if (parents[j] == parents[i]) { break; // found youngest older sibling } } if (j < 0) // no sibling found { j = TaxonomyReader.INVALID_ORDINAL; } Assert.AreEqual(j, olderSiblingArray[i]); } tr.Dispose(); indexDir.Dispose(); }
public virtual void TestChildrenArraysGrowth() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; Assert.AreEqual(3, tr.Count); Assert.AreEqual(3, ca.Siblings.Length); Assert.AreEqual(3, ca.Children.Length); Assert.True(Arrays.Equals(new int[] { 1, 2, -1 }, ca.Children)); Assert.True(Arrays.Equals(new int[] { -1, -1, -1 }, ca.Siblings)); tw.AddCategory(new FacetLabel("hi", "ho")); tw.AddCategory(new FacetLabel("hello")); tw.Commit(); // Before refresh, nothing changed.. ParallelTaxonomyArrays newca = tr.ParallelTaxonomyArrays; Assert.AreSame(newca, ca); // we got exactly the same object Assert.AreEqual(3, tr.Count); Assert.AreEqual(3, ca.Siblings.Length); Assert.AreEqual(3, ca.Children.Length); // After the refresh, things change: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; ca = tr.ParallelTaxonomyArrays; Assert.AreEqual(5, tr.Count); Assert.AreEqual(5, ca.Siblings.Length); Assert.AreEqual(5, ca.Children.Length); Assert.True(Arrays.Equals(new int[] { 4, 3, -1, -1, -1 }, ca.Children)); Assert.True(Arrays.Equals(new int[] { -1, -1, -1, 2, 1 }, ca.Siblings)); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
/// <summary> /// Creates near-real-time searcher and taxonomy reader /// from the corresponding writers. /// </summary> public SearcherTaxonomyManager(IndexWriter writer, bool applyAllDeletes, SearcherFactory searcherFactory, DirectoryTaxonomyWriter taxoWriter) { if (searcherFactory == null) { searcherFactory = new SearcherFactory(); } this.searcherFactory = searcherFactory; this.taxoWriter = taxoWriter; var taxoReader = new DirectoryTaxonomyReader(taxoWriter); Current = new SearcherAndTaxonomy(SearcherManager.GetSearcher(searcherFactory, DirectoryReader.Open(writer, applyAllDeletes)), taxoReader); this.taxoEpoch = taxoWriter.TaxonomyEpoch; }
public virtual void TestTaxonomyReaderRefreshRaces() { // compute base child arrays - after first chunk, and after the other var indexDirBase = NewDirectory(); var twBase = new DirectoryTaxonomyWriter(indexDirBase); twBase.AddCategory(new FacetLabel("a", "0")); FacetLabel abPath = new FacetLabel("a", "b"); twBase.AddCategory(abPath); twBase.Commit(); var trBase = new DirectoryTaxonomyReader(indexDirBase); ParallelTaxonomyArrays ca1 = trBase.ParallelTaxonomyArrays; int abOrd = trBase.GetOrdinal(abPath); int abYoungChildBase1 = ca1.Children[abOrd]; int numCategories = AtLeast(800); for (int i = 0; i < numCategories; i++) { twBase.AddCategory(new FacetLabel("a", "b", Convert.ToString(i))); } twBase.Dispose(); var newTaxoReader = TaxonomyReader.OpenIfChanged(trBase); Assert.NotNull(newTaxoReader); trBase.Dispose(); trBase = newTaxoReader; ParallelTaxonomyArrays ca2 = trBase.ParallelTaxonomyArrays; int abYoungChildBase2 = ca2.Children[abOrd]; int numRetries = AtLeast(50); for (int retry = 0; retry < numRetries; retry++) { AssertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); } trBase.Dispose(); indexDirBase.Dispose(); }