public virtual void TestWrongIndexFieldName() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); FacetsConfig config = new FacetsConfig(); config.SetIndexFieldName("a", "$facets2"); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.True(results.Count == 0); try { facets.GetSpecificValue("a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } try { facets.GetTopChildren(10, "a"); Fail("should have hit exc"); } catch (System.ArgumentException) { // expected } IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRootOnly2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Size); Assert.AreEqual(0, tr.GetPath(0).Length); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents()[0]); Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel())); tw.Dispose(); tr.Dispose(true); indexDir.Dispose(); }
public virtual void TestWriterTwice() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); // run fillTaxonomy again - this will try to add the same categories // again, and check that we see the same ordinal paths again, not // different ones. FillTaxonomy(tw); // Let's check the number of categories again, to see that no // extraneous categories were created: Assert.AreEqual(ExpectedCategories.Length, tw.Size); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterTwice3() { var indexDir = NewDirectory(); // First, create and fill the taxonomy var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); // Now, open the same taxonomy and add the same categories again. // After a few categories, the LuceneTaxonomyWriter implementation // will stop looking for each category on disk, and rather read them // all into memory and close it's reader. The bug was that it closed // the reader, but forgot that it did (because it didn't set the reader // reference to null). tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); // Add one new category, just to make commit() do something: tw.AddCategory(new FacetLabel("hi")); // Do a commit(). Here was a bug - if tw had a reader open, it should // be reopened after the commit. However, in our case the reader should // not be open (as explained above) but because it was not set to null, // we forgot that, tried to reopen it, and got an AlreadyClosedException. tw.Commit(); Assert.AreEqual(ExpectedCategories.Length + 1, tw.Size); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterCheckPaths2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); CheckPaths(tw); FillTaxonomy(tw); CheckPaths(tw); tw.Dispose(); tw = new DirectoryTaxonomyWriter(indexDir); CheckPaths(tw); FillTaxonomy(tw); CheckPaths(tw); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterParent2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); CheckWriterParent(tr, tw); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestChildrenArrays() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; int[] youngestChildArray = ca.Children(); Assert.AreEqual(tr.Size, youngestChildArray.Length); int[] olderSiblingArray = ca.Siblings(); Assert.AreEqual(tr.Size, olderSiblingArray.Length); for (int i = 0; i < ExpectedCategories.Length; i++) { // find expected children by looking at all expectedCategories // for children List<int?> expectedChildren = new List<int?>(); for (int j = ExpectedCategories.Length - 1; j >= 0; j--) { if (ExpectedCategories[j].Length != ExpectedCategories[i].Length + 1) { continue; // not longer by 1, so can't be a child } bool ischild = true; for (int k = 0; k < ExpectedCategories[i].Length; k++) { if (!ExpectedCategories[j][k].Equals(ExpectedCategories[i][k])) { ischild = false; break; } } if (ischild) { expectedChildren.Add(j); } } // check that children and expectedChildren are the same, with the // correct reverse (youngest to oldest) order: if (expectedChildren.Count == 0) { Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]); } else { int child = youngestChildArray[i]; Assert.AreEqual((int)expectedChildren[0], child); for (int j = 1; j < expectedChildren.Count; j++) { child = olderSiblingArray[child]; Assert.AreEqual((int)expectedChildren[j], child); // if child is INVALID_ORDINAL we should stop, but // AssertEquals would fail in this case anyway. } // When we're done comparing, olderSiblingArray should now point // to INVALID_ORDINAL, saying there are no more children. If it // doesn't, we found too many children... Assert.AreEqual(-1, olderSiblingArray[child]); } } tr.Dispose(); indexDir.Dispose(); }
public virtual void TestTaxonomyReaderRefreshRaces() { // compute base child arrays - after first chunk, and after the other var indexDirBase = NewDirectory(); var twBase = new DirectoryTaxonomyWriter(indexDirBase); twBase.AddCategory(new FacetLabel("a", "0")); FacetLabel abPath = new FacetLabel("a", "b"); twBase.AddCategory(abPath); twBase.Commit(); var trBase = new DirectoryTaxonomyReader(indexDirBase); ParallelTaxonomyArrays ca1 = trBase.ParallelTaxonomyArrays; int abOrd = trBase.GetOrdinal(abPath); int abYoungChildBase1 = ca1.Children()[abOrd]; int numCategories = AtLeast(800); for (int i = 0; i < numCategories; i++) { twBase.AddCategory(new FacetLabel("a", "b", Convert.ToString(i))); } twBase.Dispose(); var newTaxoReader = TaxonomyReader.OpenIfChanged(trBase); Assert.NotNull(newTaxoReader); trBase.Dispose(); trBase = newTaxoReader; ParallelTaxonomyArrays ca2 = trBase.ParallelTaxonomyArrays; int abYoungChildBase2 = ca2.Children()[abOrd]; int numRetries = AtLeast(50); for (int retry = 0; retry < numRetries; retry++) { AssertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); } trBase.Dispose(); indexDirBase.Dispose(); }
public virtual void TestReplaceTaxonomyNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); Store.Directory taxoDir2 = NewDirectory(); var tw2 = new DirectoryTaxonomyWriter(taxoDir2); tw2.Dispose(); var mgr = new SearcherTaxonomyManager(w, true, null, tw); w.AddDocument(new Document()); tw.ReplaceTaxonomy(taxoDir2); taxoDir2.Dispose(); try { mgr.MaybeRefresh(); Fail("should have hit exception"); } catch (IllegalStateException) { // expected } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
private void AssertConsistentYoungestChild(FacetLabel abPath, int abOrd, int abYoungChildBase1, int abYoungChildBase2, int retry, int numCategories) { var indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("a", "0")); tw.AddCategory(abPath); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); for (int i = 0; i < numCategories; i++) { var cp = new FacetLabel("a", "b", Convert.ToString(i)); tw.AddCategory(cp); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(cp), "Ordinal of " + cp + " must be invalid until Taxonomy Reader was refreshed"); } tw.Dispose(); var stop = new AtomicBoolean(false); Exception[] error = new Exception[] { null }; int[] retrieval = new int[] { 0 }; var thread = new ThreadAnonymousInnerClassHelper(this, abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, tr, stop, error, retrieval); thread.Start(); indexDir.SleepMillis = 1; // some delay for refresh var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); if (newTaxoReader != null) { newTaxoReader.Dispose(); } stop.Set(true); thread.Join(); Assert.Null(error[0], "Unexpcted exception at retry " + retry + " retrieval " + retrieval[0] + ": \n" + stackTraceStr(error[0])); tr.Dispose(); }
public virtual void TestReplaceTaxonomyDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); w.Commit(); tw.Commit(); Store.Directory taxoDir2 = NewDirectory(); var tw2 = new DirectoryTaxonomyWriter(taxoDir2); tw2.AddCategory(new FacetLabel("a", "b")); tw2.Dispose(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); SearcherAndTaxonomy pair = mgr.Acquire(); try { Assert.AreEqual(1, pair.taxonomyReader.Size); } finally { mgr.Release(pair); } w.AddDocument(new Document()); tw.ReplaceTaxonomy(taxoDir2); taxoDir2.Dispose(); w.Commit(); tw.Commit(); mgr.MaybeRefresh(); pair = mgr.Acquire(); try { Assert.AreEqual(3, pair.taxonomyReader.Size); } finally { mgr.Release(pair); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public static void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestBasic() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); // Reused across documents, to add the necessary facet // fields: Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("Author", "Bob")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("Author", "Lisa")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 40, Field.Store.NO)); doc.Add(new FacetField("Author", "Susan")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new IntField("num", 45, Field.Store.NO)); doc.Add(new FacetField("Author", "Frank")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); // Aggregate the facet counts: FacetsCollector c = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query and one of the // Facets.search utility methods: searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Retrieve & verify results: Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n Lisa (50.0)\n Frank (45.0)\n Susan (40.0)\n Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString()); taxoReader.Dispose(); searcher.IndexReader.Dispose(); dir.Dispose(); taxoDir.Dispose(); }
public virtual void TestSeparateReaderAndWriter() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Size); // the empty taxonomy has size 1 (the root) tw.AddCategory(new FacetLabel("Author")); Assert.AreEqual(1, tr.Size); // still root only... Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet Assert.AreEqual(1, tr.Size); // still root only... tw.Commit(); Assert.AreEqual(1, tr.Size); // still root only... var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; int author = 1; try { Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, tr.ParallelTaxonomyArrays.Parents()[author]); // ok } catch (System.IndexOutOfRangeException) { Fail("After category addition, commit() and refresh(), getParent for " + author + " should NOT throw exception"); } Assert.AreEqual(2, tr.Size); // finally, see there are two categories // now, add another category, and verify that after commit and refresh // the parent of this category is correct (this requires the reader // to correctly update its prefetched parent vector), and that the // old information also wasn't ruined: tw.AddCategory(new FacetLabel("Author", "Richard Dawkins")); int dawkins = 2; tw.Commit(); newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; int[] parents = tr.ParallelTaxonomyArrays.Parents(); Assert.AreEqual(author, parents[dawkins]); Assert.AreEqual(TaxonomyReader.ROOT_ORDINAL, parents[author]); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); Assert.AreEqual(3, tr.Size); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestChildrenArraysGrowth() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; Assert.AreEqual(3, tr.Size); Assert.AreEqual(3, ca.Siblings().Length); Assert.AreEqual(3, ca.Children().Length); Assert.True(Arrays.Equals(new int[] { 1, 2, -1 }, ca.Children())); Assert.True(Arrays.Equals(new int[] { -1, -1, -1 }, ca.Siblings())); tw.AddCategory(new FacetLabel("hi", "ho")); tw.AddCategory(new FacetLabel("hello")); tw.Commit(); // Before refresh, nothing changed.. ParallelTaxonomyArrays newca = tr.ParallelTaxonomyArrays; Assert.AreSame(newca, ca); // we got exactly the same object Assert.AreEqual(3, tr.Size); Assert.AreEqual(3, ca.Siblings().Length); Assert.AreEqual(3, ca.Children().Length); // After the refresh, things change: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; ca = tr.ParallelTaxonomyArrays; Assert.AreEqual(5, tr.Size); Assert.AreEqual(5, ca.Siblings().Length); Assert.AreEqual(5, ca.Children().Length); Assert.True(Arrays.Equals(new int[] { 4, 3, -1, -1, -1 }, ca.Children())); Assert.True(Arrays.Equals(new int[] { -1, -1, -1, 2, 1 }, ca.Siblings())); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestSeparateReaderAndWriter2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.Commit(); var tr = new DirectoryTaxonomyReader(indexDir); // Test getOrdinal(): FacetLabel author = new FacetLabel("Author"); Assert.AreEqual(1, tr.Size); // the empty taxonomy has size 1 (the root) Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); tw.AddCategory(author); // before commit and refresh, no change: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Size); // still root only... Assert.Null(TaxonomyReader.OpenIfChanged(tr)); // this is not enough, because tw.Commit() hasn't been done yet Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Size); // still root only... tw.Commit(); // still not enough before refresh: Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(author)); Assert.AreEqual(1, tr.Size); // still root only... var newTaxoReader = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newTaxoReader); tr.Dispose(); tr = newTaxoReader; Assert.AreEqual(1, tr.GetOrdinal(author)); Assert.AreEqual(2, tr.Size); tw.Dispose(); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestChildrenArraysInvariants() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.ParallelTaxonomyArrays; int[] children = ca.Children(); Assert.AreEqual(tr.Size, children.Length); int[] olderSiblingArray = ca.Siblings(); Assert.AreEqual(tr.Size, olderSiblingArray.Length); // test that the "youngest child" of every category is indeed a child: int[] parents = tr.ParallelTaxonomyArrays.Parents(); for (int i = 0; i < tr.Size; i++) { int youngestChild = children[i]; if (youngestChild != TaxonomyReader.INVALID_ORDINAL) { Assert.AreEqual(i, parents[youngestChild]); } } // test that the "older sibling" of every category is indeed older (lower) // (it can also be INVALID_ORDINAL, which is lower than any ordinal) for (int i = 0; i < tr.Size; i++) { Assert.True(olderSiblingArray[i] < i, "olderSiblingArray[" + i + "] should be <" + i); } // test that the "older sibling" of every category is indeed a sibling // (they share the same parent) for (int i = 0; i < tr.Size; i++) { int sibling = olderSiblingArray[i]; if (sibling == TaxonomyReader.INVALID_ORDINAL) { continue; } Assert.AreEqual(parents[i], parents[sibling]); } // And now for slightly more complex (and less "invariant-like"...) // tests: // test that the "youngest child" is indeed the youngest (so we don't // miss the first children in the chain) for (int i = 0; i < tr.Size; i++) { // Find the really youngest child: int j; for (j = tr.Size - 1; j > i; j--) { if (parents[j] == i) { break; // found youngest child } } if (j == i) // no child found { j = TaxonomyReader.INVALID_ORDINAL; } Assert.AreEqual(j, children[i]); } // test that the "older sibling" is indeed the least oldest one - and // not a too old one or -1 (so we didn't miss some children in the // middle or the end of the chain). for (int i = 0; i < tr.Size; i++) { // Find the youngest older sibling: int j; for (j = i - 1; j >= 0; j--) { if (parents[j] == parents[i]) { break; // found youngest older sibling } } if (j < 0) // no sibling found { j = TaxonomyReader.INVALID_ORDINAL; } Assert.AreEqual(j, olderSiblingArray[i]); } tr.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterCheckPaths() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomyCheckPaths(tw); // Also check TaxonomyWriter.getSize() - see that the taxonomy's size // is what we expect it to be. Assert.AreEqual(ExpectedCategories.Length, tw.Size); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestNrt() { var dir = NewDirectory(); var writer = new DirectoryTaxonomyWriter(dir); var reader = new DirectoryTaxonomyReader(writer); FacetLabel cp = new FacetLabel("a"); writer.AddCategory(cp); var newReader = TaxonomyReader.OpenIfChanged(reader); Assert.NotNull(newReader, "expected a new instance"); Assert.AreEqual(2, newReader.Size); Assert.AreNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.GetOrdinal(cp)); reader.Dispose(); reader = newReader; writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestWriterLock() { // native fslock impl gets angry if we use it, so use RAMDirectory explicitly. var indexDir = new RAMDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); // we deliberately not close the write now, and keep it open and // locked. // Verify that the writer worked: var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(2, tr.GetOrdinal(new FacetLabel("hi", "there"))); // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { Assert.Null(new DirectoryTaxonomyWriter(indexDir)); Fail("should have failed to write in locked directory"); } catch (LockObtainFailedException) { // this is what we expect to happen. } // Remove the lock, and now the open should succeed, and we can // write to the new writer. DirectoryTaxonomyWriter.Unlock(indexDir); var tw2 = new DirectoryTaxonomyWriter(indexDir); tw2.AddCategory(new FacetLabel("hey")); tw2.Dispose(); // See that the writer indeed wrote: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; Assert.AreEqual(3, tr.GetOrdinal(new FacetLabel("hey"))); tr.Dispose(); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestReaderBasic() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); // test TaxonomyReader.getSize(): Assert.AreEqual(ExpectedCategories.Length, tr.Size); // test round trips of ordinal => category => ordinal for (int i = 0; i < tr.Size; i++) { Assert.AreEqual(i, tr.GetOrdinal(tr.GetPath(i))); } // test TaxonomyReader.getCategory(): for (int i = 1; i < tr.Size; i++) { FacetLabel expectedCategory = new FacetLabel(ExpectedCategories[i]); FacetLabel category = tr.GetPath(i); if (!expectedCategory.Equals(category)) { Fail("For ordinal " + i + " expected category " + Showcat(expectedCategory) + ", but got " + Showcat(category)); } } // (also test invalid ordinals:) Assert.Null(tr.GetPath(-1)); Assert.Null(tr.GetPath(tr.Size)); Assert.Null(tr.GetPath(TaxonomyReader.INVALID_ORDINAL)); // test TaxonomyReader.GetOrdinal(): for (int i = 1; i < ExpectedCategories.Length; i++) { int expectedOrdinal = i; int ordinal = tr.GetOrdinal(new FacetLabel(ExpectedCategories[i])); if (expectedOrdinal != ordinal) { Fail("For category " + Showcat(ExpectedCategories[i]) + " expected ordinal " + expectedOrdinal + ", but got " + ordinal); } } // (also test invalid categories:) Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(new FacetLabel("non-existant"))); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.GetOrdinal(new FacetLabel("Author", "Jules Verne"))); tr.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterSimpler() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); Assert.AreEqual(1, tw.Size); // the root only // Test that adding a new top-level category works Assert.AreEqual(1, tw.AddCategory(new FacetLabel("a"))); Assert.AreEqual(2, tw.Size); // Test that adding the same category again is noticed, and the // same ordinal (and not a new one) is returned. Assert.AreEqual(1, tw.AddCategory(new FacetLabel("a"))); Assert.AreEqual(2, tw.Size); // Test that adding another top-level category returns a new ordinal, // not the same one Assert.AreEqual(2, tw.AddCategory(new FacetLabel("b"))); Assert.AreEqual(3, tw.Size); // Test that adding a category inside one of the above adds just one // new ordinal: Assert.AreEqual(3, tw.AddCategory(new FacetLabel("a", "c"))); Assert.AreEqual(4, tw.Size); // Test that adding the same second-level category doesn't do anything: Assert.AreEqual(3, tw.AddCategory(new FacetLabel("a", "c"))); Assert.AreEqual(4, tw.Size); // Test that adding a second-level category with two new components // indeed adds two categories Assert.AreEqual(5, tw.AddCategory(new FacetLabel("d", "e"))); Assert.AreEqual(6, tw.Size); // Verify that the parents were added above in the order we expected Assert.AreEqual(4, tw.AddCategory(new FacetLabel("d"))); // Similar, but inside a category that already exists: Assert.AreEqual(7, tw.AddCategory(new FacetLabel("b", "d", "e"))); Assert.AreEqual(8, tw.Size); // And now inside two levels of categories that already exist: Assert.AreEqual(8, tw.AddCategory(new FacetLabel("b", "d", "f"))); Assert.AreEqual(9, tw.Size); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestReaderParent() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); // check that the parent of the root ordinal is the invalid ordinal: int[] parents = tr.ParallelTaxonomyArrays.Parents(); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, parents[0]); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.Size; ordinal++) { FacetLabel me = tr.GetPath(ordinal); int parentOrdinal = parents[ordinal]; FacetLabel parent = tr.GetPath(parentOrdinal); if (parent == null) { Fail("Parent of " + ordinal + " is " + parentOrdinal + ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the strings if (!me.Subpath(me.Length - 1).Equals(parent)) { Fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + Showcat(parent) + " and " + Showcat(me) + " respectively."); } } tr.Dispose(); indexDir.Dispose(); }
public virtual void TestWriterTwice2() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); FillTaxonomy(tw); tw.Dispose(); tw = new DirectoryTaxonomyWriter(indexDir); // run fillTaxonomy again - this will try to add the same categories // again, and check that we see the same ordinals again, not different // ones, and that the number of categories hasn't grown by the new // additions FillTaxonomy(tw); Assert.AreEqual(ExpectedCategories.Length, tw.Size); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestRootOnly() { var indexDir = NewDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); // right after opening the index, it should already contain the // root, so have size 1: Assert.AreEqual(1, tw.Size); tw.Dispose(); var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(1, tr.Size); Assert.AreEqual(0, tr.GetPath(0).Length); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, tr.ParallelTaxonomyArrays.Parents()[0]); Assert.AreEqual(0, tr.GetOrdinal(new FacetLabel())); tr.Dispose(true); indexDir.Dispose(); }
public static void BeforeClassDrillDownQueryTest() { dir = NewDirectory(); Random r = Random(); RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false))); taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); config = new FacetsConfig(); // Randomize the per-dim config: config.SetHierarchical("a", Random().NextBoolean()); config.SetMultiValued("a", Random().NextBoolean()); if (Random().NextBoolean()) { config.SetIndexFieldName("a", "$a"); } config.SetRequireDimCount("a", true); config.SetHierarchical("b", Random().NextBoolean()); config.SetMultiValued("b", Random().NextBoolean()); if (Random().NextBoolean()) { config.SetIndexFieldName("b", "$b"); } config.SetRequireDimCount("b", true); for (int i = 0; i < 100; i++) { Document doc = new Document(); if (i % 2 == 0) // 50 { doc.Add(new TextField("content", "foo", Field.Store.NO)); } if (i % 3 == 0) // 33 { doc.Add(new TextField("content", "bar", Field.Store.NO)); } if (i % 4 == 0) // 25 { if (r.NextBoolean()) { doc.Add(new FacetField("a", "1")); } else { doc.Add(new FacetField("a", "2")); } } if (i % 5 == 0) // 20 { doc.Add(new FacetField("b", "1")); } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxo = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestSparseFacets() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); // Writes facet ords to a separate directory from the // main index: var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new IntField("num", 10, Field.Store.NO)); doc.Add(new FacetField("a", "foo1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 20, Field.Store.NO)); doc.Add(new FacetField("a", "foo2")); doc.Add(new FacetField("b", "bar1")); writer.AddDocument(config.Build(taxoWriter, doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new IntField("num", 30, Field.Store.NO)); doc.Add(new FacetField("a", "foo3")); doc.Add(new FacetField("b", "bar2")); doc.Add(new FacetField("c", "baz1")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); writer.Dispose(); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); taxoWriter.Dispose(); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num")); // Ask for top 10 labels for any dims that have counts: IList<FacetResult> results = facets.GetAllDims(10); Assert.AreEqual(3, results.Count); Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n foo2 (20.0)\n foo1 (10.0)\n", results[0].ToString()); Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n bar1 (20.0)\n", results[1].ToString()); Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results[2].ToString()); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }