public virtual void TestWriterParent2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); CheckWriterParent(tr, tw); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterTwice3() { var indexDir = NewDirectory(); // First, create and fill the taxonomy var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); // Now, open the same taxonomy and add the same categories again. // After a few categories, the LuceneTaxonomyWriter implementation // will stop looking for each category on disk, and rather read them // all into memory and close it's reader. The bug was that it closed // the reader, but forgot that it did (because it didn't set the reader // reference to null). tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); // Add one new category, just to make commit() do something: tw.AddCategory(new FacetLabel("hi")); // Do a commit(). Here was a bug - if tw had a reader open, it should // be reopened after the commit. However, in our case the reader should // not be open (as explained above) but because it was not set to null, // we forgot that, tried to reopen it, and got an AlreadyClosedException. tw.Commit(); Assert.AreEqual(ExpectedCategories.Length + 1, tw.Size); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestTaxonomyReaderRefreshRaces() { // compute base child arrays - after first chunk, and after the other var indexDirBase = NewDirectory(); var twBase = new DirectoryTaxonomyWriter(indexDirBase); twBase.AddCategory(new FacetLabel("a", "0")); FacetLabel abPath = new FacetLabel("a", "b"); twBase.AddCategory(abPath); twBase.Commit(); var trBase = new DirectoryTaxonomyReader(indexDirBase); ParallelTaxonomyArrays ca1 = trBase.ParallelTaxonomyArrays; int abOrd = trBase.GetOrdinal(abPath); int abYoungChildBase1 = ca1.Children()[abOrd]; int numCategories = AtLeast(800); for (int i = 0; i < numCategories; i++) { twBase.AddCategory(new FacetLabel("a", "b", Convert.ToString(i))); } twBase.Dispose(); var newTaxoReader = TaxonomyReader.OpenIfChanged(trBase); Assert.NotNull(newTaxoReader); trBase.Dispose(); trBase = newTaxoReader; ParallelTaxonomyArrays ca2 = trBase.ParallelTaxonomyArrays; int abYoungChildBase2 = ca2.Children()[abOrd]; int numRetries = AtLeast(50); for (int retry = 0; retry < numRetries; retry++) { AssertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); } trBase.Dispose(); indexDirBase.Dispose(); }
public virtual void TestWriterLock() { // native fslock impl gets angry if we use it, so use RAMDirectory explicitly. var indexDir = new RAMDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); // we deliberately not close the write now, and keep it open and // locked. // Verify that the writer worked: var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(2, tr.GetOrdinal(new FacetLabel("hi", "there"))); // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { Assert.Null(new DirectoryTaxonomyWriter(indexDir)); Fail("should have failed to write in locked directory"); } catch (LockObtainFailedException) { // this is what we expect to happen. } // Remove the lock, and now the open should succeed, and we can // write to the new writer. DirectoryTaxonomyWriter.Unlock(indexDir); var tw2 = new DirectoryTaxonomyWriter(indexDir); tw2.AddCategory(new FacetLabel("hey")); tw2.Dispose(); // See that the writer indeed wrote: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; Assert.AreEqual(3, tr.GetOrdinal(new FacetLabel("hey"))); tr.Dispose(); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestSeparateReaderAndWriter() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Size); // the empty taxonomy has size 1 (the root) tw.AddCategory(new FacetLabel("Author")); Assert.AreEqual(1, tr.Size); // still root only... Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet Assert.AreEqual(1, tr.Size); // still root only... tw.Commit(); Assert.AreEqual(1, tr.Size); // still root only... var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; int author = 1; try { Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tr.ParallelTaxonomyArrays.Parents()[author]); // ok } catch (System.IndexOutOfRangeException) { Fail("After category addition, commit() and refresh(), getParent for " + author + " should NOT throw exception"); } Assert.AreEqual(2, tr.Size); // finally, see there are two categories // now, add another category, and verify that after commit and refresh // the parent of this category is correct (this requires the reader // to correctly update its prefetched parent vector), and that the // old information also wasn't ruined: tw.AddCategory(new FacetLabel("Author", "Richard Dawkins")); int dawkins = 2; tw.Commit(); newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; int[] parents = tr.ParallelTaxonomyArrays.Parents(); Assert.AreEqual(author, parents[dawkins]); Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, parents[author]); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); Assert.AreEqual(3, tr.Size); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestSeparateReaderAndWriter2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); // Test getOrdinal(): FacetLabel author = new FacetLabel("Author"); Assert.AreEqual(1, tr.Size); // the empty taxonomy has size 1 (the root) Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); tw.AddCategory(author); // before commit and refresh, no change: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Size); // still root only... Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Size); // still root only... tw.Commit(); // still not enough before refresh: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Size); // still root only... var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; Assert.AreEqual(1, tr.GetOrdinal(author)); Assert.AreEqual(2, tr.Size); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestChildrenArraysGrowth() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; Assert.AreEqual(3, tr.Size); Assert.AreEqual(3, ca.Siblings().Length); Assert.AreEqual(3, ca.Children().Length); Assert.True(Arrays.Equals(new int[] { 1, 2, -1 }, ca.Children())); Assert.True(Arrays.Equals(new int[] { -1, -1, -1 }, ca.Siblings())); tw.AddCategory(new FacetLabel("hi", "ho")); tw.AddCategory(new FacetLabel("hello")); tw.Commit(); // Before refresh, nothing changed.. ParallelTaxonomyArrays newca = tr.ParallelTaxonomyArrays; Assert.AreSame(newca, ca); // we got exactly the same object Assert.AreEqual(3, tr.Size); Assert.AreEqual(3, ca.Siblings().Length); Assert.AreEqual(3, ca.Children().Length); // After the refresh, things change: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; ca = tr.ParallelTaxonomyArrays; Assert.AreEqual(5, tr.Size); Assert.AreEqual(5, ca.Siblings().Length); Assert.AreEqual(5, ca.Children().Length); Assert.True(Arrays.Equals(new int[] { 4, 3, -1, -1, -1 }, ca.Children())); Assert.True(Arrays.Equals(new int[] { -1, -1, -1, 2, 1 }, ca.Siblings())); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestRootOnly2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Size); Assert.AreEqual(0, tr.GetPath(0).Length); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents()[0]); Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel())); tw.Dispose(); tr.Dispose(true); indexDir.Dispose(); }
private void AssertConsistentYoungestChild(FacetLabel abPath, int abOrd, int abYoungChildBase1, int abYoungChildBase2, int retry, int numCategories) { var indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("a", "0")); tw.AddCategory(abPath); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); for (int i = 0; i < numCategories; i++) { var cp = new FacetLabel("a", "b", Convert.ToString(i)); tw.AddCategory(cp); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(cp), "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed"); } tw.Dispose(); var stop = new AtomicBoolean(false); Exception[] error = new Exception[] { null }; int[] retrieval = new int[] { 0 }; var thread = new ThreadAnonymousInnerClassHelper(this, abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, tr, stop, error, retrieval); thread.Start(); indexDir.SleepMillis = 1; // some delay for refresh var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); if (newTaxoReader != null) { newTaxoReader.Dispose(); } stop.Set(true); thread.Join(); Assert.Null(error[0], "Unexpcted exception at retry " + retry + " retrieval " + retrieval[0] + ": \n" + stackTraceStr(error[0])); tr.Dispose(); }
public virtual void TestCustom() { Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); // create and open an index writer RandomIndexWriter iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = Config; config.SetIndexFieldName("Author", "$author"); seedIndex(tw, iw, config); IndexReader ir = iw.Reader; tw.Commit(); // prepare index reader and taxonomy. var tr = new DirectoryTaxonomyReader(taxoDir); // prepare searcher to search against IndexSearcher searcher = NewSearcher(ir); FacetsCollector sfc = PerformSearch(tr, ir, searcher); IDictionary<string, Facets> facetsMap = new Dictionary<string, Facets>(); facetsMap["Author"] = GetTaxonomyFacetCounts(tr, config, sfc, "$author"); Facets facets = new MultiFacets(facetsMap, GetTaxonomyFacetCounts(tr, config, sfc)); // Obtain facets results and hand-test them AssertCorrectResults(facets); assertOrdinalsExist("$facets", ir); assertOrdinalsExist("$author", ir); IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir); }
public virtual void TestDefault() { Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); // create and open an index writer var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); var config = Config; seedIndex(tw, iw, config); IndexReader ir = iw.Reader; tw.Commit(); // prepare index reader and taxonomy. var tr = new DirectoryTaxonomyReader(taxoDir); // prepare searcher to search against IndexSearcher searcher = NewSearcher(ir); FacetsCollector sfc = PerformSearch(tr, ir, searcher); // Obtain facets results and hand-test them AssertCorrectResults(GetTaxonomyFacetCounts(tr, config, sfc)); assertOrdinalsExist("$facets", ir); IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir); }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public virtual void TestReplaceTaxonomyDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); w.Commit(); tw.Commit(); Store.Directory taxoDir2 = NewDirectory(); var tw2 = new DirectoryTaxonomyWriter(taxoDir2); tw2.AddCategory(new FacetLabel("a", "b")); tw2.Dispose(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); SearcherAndTaxonomy pair = mgr.Acquire(); try { Assert.AreEqual(1, pair.taxonomyReader.Size); } finally { mgr.Release(pair); } w.AddDocument(new Document()); tw.ReplaceTaxonomy(taxoDir2); taxoDir2.Dispose(); w.Commit(); tw.Commit(); mgr.MaybeRefresh(); pair = mgr.Acquire(); try { Assert.AreEqual(3, pair.taxonomyReader.Size); } finally { mgr.Release(pair); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }