public virtual void TestCloseAfterIncRef() { Directory dir = NewDirectory(); var ltw = new DirectoryTaxonomyWriter(dir); ltw.AddCategory(new FacetLabel("a")); ltw.Dispose(); DirectoryTaxonomyReader ltr = new DirectoryTaxonomyReader(dir); ltr.IncRef(); ltr.Dispose(); // should not fail as we IncRef() before close var tmpSie = ltr.Size; ltr.DecRef(); dir.Dispose(); }
public virtual void TestAlreadyClosed() { Directory dir = NewDirectory(); var ltw = new DirectoryTaxonomyWriter(dir); ltw.AddCategory(new FacetLabel("a")); ltw.Dispose(); var ltr = new DirectoryTaxonomyReader(dir); ltr.Dispose(); try { var tmpSize = ltr.Size; Fail("An AlreadyClosedException should have been thrown here"); } catch (AlreadyClosedException) { // good! } dir.Dispose(); }
public virtual void TestCloseAfterIncRef() { Directory dir = NewDirectory(); var ltw = new DirectoryTaxonomyWriter(dir); ltw.AddCategory(new FacetLabel("a")); ltw.Dispose(); DirectoryTaxonomyReader ltr = new DirectoryTaxonomyReader(dir); ltr.IncRef(); ltr.Dispose(); // should not fail as we IncRef() before close var tmpSie = ltr.Size; ltr.DecRef(); dir.Dispose(); }
public virtual void TestOpenIfChangedMergedSegment() { // test openIfChanged() when all index segments were merged - used to be // a bug in ParentArray, caught by testOpenIfChangedManySegments - only // this test is not random Directory dir = NewDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMergePolicy(new LogByteSizeMergePolicy())); // LUCENENET: We need to set the index writer before the constructor of the base class is called // because the DirectoryTaxonomyWriter class constructor is the consumer of the OpenIndexWriter method. // The only option seems to be to set it statically before creating the instance. DirectoryTaxonomyWriterAnonymousInnerClassHelper2.iw = iw; var writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper2(dir); var reader = new DirectoryTaxonomyReader(writer); Assert.AreEqual(1, reader.Count); Assert.AreEqual(1, reader.ParallelTaxonomyArrays.Parents.Length); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. writer.AddCategory(new FacetLabel("1")); iw.ForceMerge(1); // now calling openIfChanged should trip on the bug var newtr = TaxonomyReader.OpenIfChanged(reader); Assert.IsNotNull(newtr); reader.Dispose(); reader = newtr; Assert.AreEqual(2, reader.Count); Assert.AreEqual(2, reader.ParallelTaxonomyArrays.Parents.Length); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestBackwardsCompatibility() { // tests that if the taxonomy index doesn't have the INDEX_EPOCH // property (supports pre-3.6 indexes), all still works. Directory dir = NewDirectory(); // create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1. (new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null))).Dispose(); var taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); taxoWriter.Dispose(); var taxoReader = new DirectoryTaxonomyReader(dir); Assert.AreEqual(1, Convert.ToInt32(taxoReader.CommitUserData[DirectoryTaxonomyWriter.INDEX_EPOCH])); Assert.Null(TaxonomyReader.OpenIfChanged(taxoReader)); (taxoReader).Dispose(); dir.Dispose(); }
public virtual void TestOpenIfChangedManySegments() { // test openIfChanged() when the taxonomy contains many segments Directory dir = NewDirectory(); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper(this, dir); var reader = new DirectoryTaxonomyReader(writer); int numRounds = Random.Next(10) + 10; int numCategories = 1; // one for root for (int i = 0; i < numRounds; i++) { int numCats = Random.Next(4) + 1; for (int j = 0; j < numCats; j++) { writer.AddCategory(new FacetLabel(Convert.ToString(i, CultureInfo.InvariantCulture), Convert.ToString(j, CultureInfo.InvariantCulture))); } numCategories += numCats + 1; // one for round-parent var newtr = TaxonomyReader.OpenIfChanged(reader); Assert.IsNotNull(newtr); reader.Dispose(); reader = newtr; // assert categories Assert.AreEqual(numCategories, reader.Count); int roundOrdinal = reader.GetOrdinal(new FacetLabel(Convert.ToString(i, CultureInfo.InvariantCulture))); int[] parents = reader.ParallelTaxonomyArrays.Parents; Assert.AreEqual(0, parents[roundOrdinal]); // round's parent is root for (int j = 0; j < numCats; j++) { int ord = reader.GetOrdinal(new FacetLabel(Convert.ToString(i, CultureInfo.InvariantCulture), Convert.ToString(j, CultureInfo.InvariantCulture))); Assert.AreEqual(roundOrdinal, parents[ord]); // round's parent is root } } reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestAlreadyClosed() { Directory dir = NewDirectory(); var ltw = new DirectoryTaxonomyWriter(dir); ltw.AddCategory(new FacetLabel("a")); ltw.Dispose(); var ltr = new DirectoryTaxonomyReader(dir); ltr.Dispose(); try { var _ = ltr.Count; fail("An ObjectDisposedException should have been thrown here"); } catch (ObjectDisposedException) { // good! } dir.Dispose(); }
public virtual void TestRecreateAndRefresh() { // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in // CREATE_OR_APPEND (or commit(userData) called twice), which could lead to // DirTaxoReader succeeding to refresh(). Directory dir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); TouchTaxo(taxoWriter, new FacetLabel("a")); var taxoReader = new DirectoryTaxonomyReader(dir); TouchTaxo(taxoWriter, new FacetLabel("b")); var newtr = TaxonomyReader.OpenIfChanged(taxoReader); taxoReader.Dispose(); taxoReader = newtr; Assert.AreEqual(1, Convert.ToInt32(taxoReader.CommitUserData[DirectoryTaxonomyWriter.INDEX_EPOCH])); // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again. taxoWriter.Dispose(); taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE); TouchTaxo(taxoWriter, new FacetLabel("c")); taxoWriter.Dispose(); taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); TouchTaxo(taxoWriter, new FacetLabel("d")); taxoWriter.Dispose(); newtr = TaxonomyReader.OpenIfChanged(taxoReader); taxoReader.Dispose(); taxoReader = newtr; Assert.AreEqual(2, Convert.ToInt32(taxoReader.CommitUserData[DirectoryTaxonomyWriter.INDEX_EPOCH])); taxoReader.Dispose(); dir.Dispose(); }
public virtual void TestOpenIfChangedNoChangesButSegmentMerges() { // test openIfChanged() when the taxonomy hasn't really changed, but segments // were merged. The NRT reader will be reopened, and ParentArray used to assert // that the new reader contains more ordinals than were given from the old // TaxReader version Directory dir = NewDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. var iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper3(this, dir, iw); // add a category so that the following DTR open will cause a flush and // a new segment will be created writer.AddCategory(new FacetLabel("a")); var reader = new DirectoryTaxonomyReader(writer); Assert.AreEqual(2, reader.Size); Assert.AreEqual(2, reader.ParallelTaxonomyArrays.Parents().Length); // merge all the segments so that NRT reader thinks there's a change iw.ForceMerge(1); // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor var newtr = TaxonomyReader.OpenIfChanged(reader); Assert.NotNull(newtr); reader.Dispose(); reader = newtr; Assert.AreEqual(2, reader.Size); Assert.AreEqual(2, reader.ParallelTaxonomyArrays.Parents().Length); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
private void validate(Directory dest, Directory src, IOrdinalMap ordMap) { using var destTr = new DirectoryTaxonomyReader(dest); int destSize = destTr.Count; using var srcTR = new DirectoryTaxonomyReader(src); var map = ordMap.GetMap(); // validate taxo sizes int srcSize = srcTR.Count; Assert.IsTrue(destSize >= srcSize, "destination taxonomy expected to be larger than source; dest=" + destSize + " src=" + srcSize); // validate that all source categories exist in destination, and their // ordinals are as expected. for (int j = 1; j < srcSize; j++) { FacetLabel cp = srcTR.GetPath(j); int destOrdinal = destTr.GetOrdinal(cp); Assert.IsTrue(destOrdinal > 0, cp + " not found in destination"); Assert.AreEqual(destOrdinal, map[j]); } }
/// <summary> /// Implements the opening of a new <seealso cref="DirectoryTaxonomyReader"/> instance if /// the taxonomy has changed. /// /// <para> /// <b>NOTE:</b> the returned <seealso cref="DirectoryTaxonomyReader"/> shares the /// ordinal and category caches with this reader. This is not expected to cause /// any issues, unless the two instances continue to live. The reader /// guarantees that the two instances cannot affect each other in terms of /// correctness of the caches, however if the size of the cache is changed /// through <seealso cref="#setCacheSize(int)"/>, it will affect both reader instances. /// </para> /// </summary> protected override TaxonomyReader DoOpenIfChanged() { EnsureOpen(); // This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT). var r2 = DirectoryReader.OpenIfChanged(indexReader); if (r2 == null) { return null; // no changes, nothing to do } // check if the taxonomy was recreated bool success = false; try { bool recreated = false; if (taxoWriter == null) { // not NRT, check epoch from commit data string t1 = indexReader.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH]; string t2 = r2.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH]; if (t1 == null) { if (t2 != null) { recreated = true; } } else if (!t1.Equals(t2)) { // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data. // it's ok to use String.equals because we require the two epoch values to be the same. recreated = true; } } else { // NRT, compare current taxoWriter.epoch() vs the one that was given at construction if (taxoEpoch != taxoWriter.TaxonomyEpoch) { recreated = true; } } DirectoryTaxonomyReader newtr; if (recreated) { // if recreated, do not reuse anything from this instace. the information // will be lazily computed by the new instance when needed. newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null); } else { newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays); } success = true; return newtr; } finally { if (!success) { IOUtils.CloseWhileHandlingException(r2); } } }
public virtual void TestConcurrency() { AtomicInt32 numDocs = new AtomicInt32(AtLeast(10000)); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>(); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs)); ThreadJob[] indexThreads = new ThreadJob[AtLeast(4)]; FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { config.SetHierarchical("l1." + i, true); config.SetMultiValued("l1." + i, true); } for (int i = 0; i < indexThreads.Length; i++) { indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config); } foreach (ThreadJob t in indexThreads) { t.Start(); } foreach (ThreadJob t in indexThreads) { t.Join(); } var tr = new DirectoryTaxonomyReader(tw); // +1 for root category if (values.Count + 1 != tr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (tr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } fail("mismatch number of categories"); } int[] parents = tr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.IsTrue(tr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = tr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir); }
public virtual void TestConcurrency() { int ncats = AtLeast(100000); // add many categories int range = ncats * 3; // affects the categories selection AtomicInteger numCats = new AtomicInteger(ncats); Directory dir = NewDirectory(); var values = new ConcurrentDictionary<string, string>(); double d = Random().NextDouble(); ITaxonomyWriterCache cache; if (d < 0.7) { // this is the fastest, yet most memory consuming cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3); } else if (TEST_NIGHTLY && d > 0.98) { // this is the slowest, but tests the writer concurrency when no caching is done. // only pick it during NIGHTLY tests, and even then, with very low chances. cache = NO_OP_CACHE; } else { // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too. cache = new LruTaxonomyWriterCache(ncats / 10); } if (VERBOSE) { Console.WriteLine("TEST: use cache=" + cache); } var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache); ThreadClass[] addThreads = new ThreadClass[AtLeast(4)]; for (int z = 0; z < addThreads.Length; z++) { addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw); } foreach (var t in addThreads) { t.Start(); } foreach (var t in addThreads) { t.Join(); } tw.Dispose(); DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir); // +1 for root category if (values.Count + 1 != dtr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (dtr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } Fail("mismatch number of categories"); } int[] parents = dtr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = new FacetLabel(); for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = dtr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Close(dtr, dir); }
public virtual void TestHugeLabel() { Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1)); FacetsConfig config = new FacetsConfig(); // Add one huge label: string bigs = null; int ordinal = -1; int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator bigs = TestUtil.RandomSimpleString(Random(), len, len); FacetField ff = new FacetField("dim", bigs); FacetLabel cp = new FacetLabel("dim", bigs); ordinal = taxoWriter.AddCategory(cp); Document doc = new Document(); doc.Add(ff); indexWriter.AddDocument(config.Build(taxoWriter, doc)); // Add tiny ones to cause a re-hash for (int i = 0; i < 3; i++) { string s = TestUtil.RandomSimpleString(Random(), 1, 10); taxoWriter.AddCategory(new FacetLabel("dim", s)); doc = new Document(); doc.Add(new FacetField("dim", s)); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } // when too large components were allowed to be added, this resulted in a new added category Assert.AreEqual(ordinal, taxoWriter.AddCategory(cp)); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = new IndexSearcher(indexReader); DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig()); ddq.Add("dim", bigs); Assert.AreEqual(1, searcher.Search(ddq, 10).TotalHits); IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestOpenIfChangedMergedSegment() { // test openIfChanged() when all index segments were merged - used to be // a bug in ParentArray, caught by testOpenIfChangedManySegments - only // this test is not random Directory dir = NewDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy())); var writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper2(this, dir, iw) as DirectoryTaxonomyWriter; var reader = new DirectoryTaxonomyReader(writer); Assert.AreEqual(1, reader.Size); Assert.AreEqual(1, reader.ParallelTaxonomyArrays.Parents().Length); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. writer.AddCategory(new FacetLabel("1")); iw.ForceMerge(1); // now calling openIfChanged should trip on the bug var newtr = TaxonomyReader.OpenIfChanged(reader); Assert.NotNull(newtr); reader.Dispose(); reader = newtr; Assert.AreEqual(2, reader.Size); Assert.AreEqual(2, reader.ParallelTaxonomyArrays.Parents().Length); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestBackwardsCompatibility() { // tests that if the taxonomy index doesn't have the INDEX_EPOCH // property (supports pre-3.6 indexes), all still works. Directory dir = NewDirectory(); // create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1. (new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null))).Dispose(); var taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); taxoWriter.Dispose(); var taxoReader = new DirectoryTaxonomyReader(dir); Assert.AreEqual(1, Convert.ToInt32(taxoReader.CommitUserData[DirectoryTaxonomyWriter.INDEX_EPOCH])); Assert.Null(TaxonomyReader.OpenIfChanged(taxoReader)); (taxoReader).Dispose(); dir.Dispose(); }
/// <summary> /// Implements the opening of a new <seealso cref="DirectoryTaxonomyReader"/> instance if /// the taxonomy has changed. /// /// <para> /// <b>NOTE:</b> the returned <seealso cref="DirectoryTaxonomyReader"/> shares the /// ordinal and category caches with this reader. This is not expected to cause /// any issues, unless the two instances continue to live. The reader /// guarantees that the two instances cannot affect each other in terms of /// correctness of the caches, however if the size of the cache is changed /// through <seealso cref="#setCacheSize(int)"/>, it will affect both reader instances. /// </para> /// </summary> protected override TaxonomyReader DoOpenIfChanged() { EnsureOpen(); // This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT). var r2 = DirectoryReader.OpenIfChanged(indexReader); if (r2 == null) { return(null); // no changes, nothing to do } // check if the taxonomy was recreated bool success = false; try { bool recreated = false; if (taxoWriter == null) { // not NRT, check epoch from commit data string t1 = indexReader.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH]; string t2 = r2.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH]; if (t1 == null) { if (t2 != null) { recreated = true; } } else if (!t1.Equals(t2)) { // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data. // it's ok to use String.equals because we require the two epoch values to be the same. recreated = true; } } else { // NRT, compare current taxoWriter.epoch() vs the one that was given at construction if (taxoEpoch != taxoWriter.TaxonomyEpoch) { recreated = true; } } DirectoryTaxonomyReader newtr; if (recreated) { // if recreated, do not reuse anything from this instace. the information // will be lazily computed by the new instance when needed. newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null); } else { newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays); } success = true; return(newtr); } finally { if (!success) { IOUtils.CloseWhileHandlingException(r2); } } }
public virtual void TestConcurrency() { int ncats = AtLeast(100000); // add many categories int range = ncats * 3; // affects the categories selection AtomicInteger numCats = new AtomicInteger(ncats); Directory dir = NewDirectory(); var values = new ConcurrentDictionary <string, string>(); double d = Random().NextDouble(); TaxonomyWriterCache cache; if (d < 0.7) { // this is the fastest, yet most memory consuming cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3); } else if (TEST_NIGHTLY && d > 0.98) { // this is the slowest, but tests the writer concurrency when no caching is done. // only pick it during NIGHTLY tests, and even then, with very low chances. cache = NO_OP_CACHE; } else { // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too. cache = new LruTaxonomyWriterCache(ncats / 10); } if (VERBOSE) { Console.WriteLine("TEST: use cache=" + cache); } var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache); ThreadClass[] addThreads = new ThreadClass[AtLeast(4)]; for (int z = 0; z < addThreads.Length; z++) { addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw); } foreach (var t in addThreads) { t.Start(); } foreach (var t in addThreads) { t.Join(); } tw.Dispose(); DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir); // +1 for root category if (values.Count + 1 != dtr.Size) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (dtr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } Fail("mismatch number of categories"); } int[] parents = dtr.ParallelTaxonomyArrays.Parents(); foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = new FacetLabel(); for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = dtr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Close(dtr, dir); }
public virtual void TestConcurrency() { AtomicInteger numDocs = new AtomicInteger(AtLeast(10000)); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); ConcurrentDictionary<string, string> values = new ConcurrentDictionary<string, string>(); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs.Get())); ThreadClass[] indexThreads = new ThreadClass[AtLeast(4)]; FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { config.SetHierarchical("l1." + i, true); config.SetMultiValued("l1." + i, true); } for (int i = 0; i < indexThreads.Length; i++) { indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config); } foreach (ThreadClass t in indexThreads) { t.Start(); } foreach (ThreadClass t in indexThreads) { t.Join(); } var tr = new DirectoryTaxonomyReader(tw); // +1 for root category if (values.Count + 1 != tr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (tr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } Fail("mismatch number of categories"); } int[] parents = tr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = tr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Close(tw, iw, tr, taxoDir, indexDir); }
public virtual void TestGetChildren() { Directory dir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(dir); int numCategories = AtLeast(10); int numA = 0, numB = 0; Random random = Random(); // add the two categories for which we'll also add children (so asserts are simpler) taxoWriter.AddCategory(new FacetLabel("a")); taxoWriter.AddCategory(new FacetLabel("b")); for (int i = 0; i < numCategories; i++) { if (random.NextBoolean()) { taxoWriter.AddCategory(new FacetLabel("a", Convert.ToString(i))); ++numA; } else { taxoWriter.AddCategory(new FacetLabel("b", Convert.ToString(i))); ++numB; } } // add category with no children taxoWriter.AddCategory(new FacetLabel("c")); taxoWriter.Dispose(); var taxoReader = new DirectoryTaxonomyReader(dir); // non existing category TaxonomyReader.ChildrenIterator it = taxoReader.GetChildren(taxoReader.GetOrdinal(new FacetLabel("invalid"))); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, it.Next()); // a category with no children it = taxoReader.GetChildren(taxoReader.GetOrdinal(new FacetLabel("c"))); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, it.Next()); // arbitrary negative ordinal it = taxoReader.GetChildren(-2); Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, it.Next()); // root's children var roots = new HashSet<string>(Arrays.AsList("a", "b", "c")); it = taxoReader.GetChildren(TaxonomyReader.ROOT_ORDINAL); while (roots.Count > 0) { FacetLabel root = taxoReader.GetPath(it.Next()); Assert.AreEqual(1, root.Length); Assert.True(roots.Remove(root.Components[0])); } Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, it.Next()); for (int i = 0; i < 2; i++) { FacetLabel cp = i == 0 ? new FacetLabel("a") : new FacetLabel("b"); int ordinal = taxoReader.GetOrdinal(cp); it = taxoReader.GetChildren(ordinal); int numChildren = 0; int child; while ((child = it.Next()) != TaxonomyReader.INVALID_ORDINAL) { FacetLabel path = taxoReader.GetPath(child); Assert.AreEqual(2, path.Length); Assert.AreEqual(path.Components[0], i == 0 ? "a" : "b"); ++numChildren; } int expected = i == 0 ? numA : numB; Assert.AreEqual(expected, numChildren, "invalid num children"); } taxoReader.Dispose(); dir.Dispose(); }
public virtual void TestOpenIfChangedAndRefCount() { Directory dir = new RAMDirectory(); // no need for random directories here var taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.AddCategory(new FacetLabel("a")); taxoWriter.Commit(); var taxoReader = new DirectoryTaxonomyReader(dir); Assert.AreEqual(1, taxoReader.RefCount, "wrong refCount"); taxoReader.IncRef(); Assert.AreEqual(2, taxoReader.RefCount, "wrong refCount"); taxoWriter.AddCategory(new FacetLabel("a", "b")); taxoWriter.Commit(); var newtr = TaxonomyReader.OpenIfChanged(taxoReader); Assert.NotNull(newtr); taxoReader.Dispose(); taxoReader = newtr; Assert.AreEqual(1, taxoReader.RefCount, "wrong refCount"); taxoWriter.Dispose(); taxoReader.Dispose(); dir.Dispose(); }
public virtual void TestOpenIfChangedResult() { Directory dir = null; DirectoryTaxonomyWriter ltw = null; DirectoryTaxonomyReader ltr = null; try { dir = NewDirectory(); ltw = new DirectoryTaxonomyWriter(dir); ltw.AddCategory(new FacetLabel("a")); ltw.Commit(); ltr = new DirectoryTaxonomyReader(dir); Assert.Null(TaxonomyReader.OpenIfChanged(ltr), "Nothing has changed"); ltw.AddCategory(new FacetLabel("b")); ltw.Commit(); DirectoryTaxonomyReader newtr = TaxonomyReader.OpenIfChanged(ltr); Assert.NotNull(newtr, "changes were committed"); Assert.Null(TaxonomyReader.OpenIfChanged(newtr), "Nothing has changed"); (newtr).Dispose(); } finally { IOUtils.Close(ltw, ltr, dir); } }
public virtual void TestOpenIfChangedNoChangesButSegmentMerges() { // test openIfChanged() when the taxonomy hasn't really changed, but segments // were merged. The NRT reader will be reopened, and ParentArray used to assert // that the new reader contains more ordinals than were given from the old // TaxReader version Directory dir = NewDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. var iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper3(this, dir, iw); // add a category so that the following DTR open will cause a flush and // a new segment will be created writer.AddCategory(new FacetLabel("a")); var reader = new DirectoryTaxonomyReader(writer); Assert.AreEqual(2, reader.Size); Assert.AreEqual(2, reader.ParallelTaxonomyArrays.Parents().Length); // merge all the segments so that NRT reader thinks there's a change iw.ForceMerge(1); // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor var newtr = TaxonomyReader.OpenIfChanged(reader); Assert.NotNull(newtr); reader.Dispose(); reader = newtr; Assert.AreEqual(2, reader.Size); Assert.AreEqual(2, reader.ParallelTaxonomyArrays.Parents().Length); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
private void validate(Directory dest, Directory src, OrdinalMap ordMap) { var destTr = new DirectoryTaxonomyReader(dest); try { int destSize = destTr.Size; var srcTR = new DirectoryTaxonomyReader(src); try { var map = ordMap.Map; // validate taxo sizes int srcSize = srcTR.Size; Assert.True(destSize >= srcSize, "destination taxonomy expected to be larger than source; dest=" + destSize + " src=" + srcSize); // validate that all source categories exist in destination, and their // ordinals are as expected. for (int j = 1; j < srcSize; j++) { FacetLabel cp = srcTR.GetPath(j); int destOrdinal = destTr.GetOrdinal(cp); Assert.True(destOrdinal > 0, cp + " not found in destination"); Assert.AreEqual(destOrdinal, map[j]); } } finally { ((TaxonomyReader)srcTR).Dispose(true); } } finally { ((TaxonomyReader)destTr).Dispose(true); } }
public virtual void TestCloseTwice() { Directory dir = NewDirectory(); var ltw = new DirectoryTaxonomyWriter(dir); ltw.AddCategory(new FacetLabel("a")); ltw.Dispose(); var ltr = new DirectoryTaxonomyReader(dir); (ltr).Dispose(); (ltr).Dispose(); // no exception should be thrown dir.Dispose(); }
public virtual void TestConcurrency() { // tests that addTaxonomy and addCategory work in parallel int numCategories = AtLeast(10000); // build an input taxonomy index Directory src = NewDirectory(); var tw = new DirectoryTaxonomyWriter(src); for (int i = 0; i < numCategories; i++) { tw.AddCategory(new FacetLabel("a", Convert.ToString(i))); } tw.Dispose(); // now add the taxonomy to an empty taxonomy, while adding the categories // again, in parallel -- in the end, no duplicate categories should exist. Directory dest = NewDirectory(); var destTw = new DirectoryTaxonomyWriter(dest); ThreadClass t = new ThreadAnonymousInnerClassHelper2(this, numCategories, destTw); t.Start(); OrdinalMap map = new MemoryOrdinalMap(); destTw.AddTaxonomy(src, map); t.Join(); destTw.Dispose(); // now validate var dtr = new DirectoryTaxonomyReader(dest); // +2 to account for the root category + "a" Assert.AreEqual(numCategories + 2, dtr.Size); var categories = new HashSet<FacetLabel>(); for (int i = 1; i < dtr.Size; i++) { FacetLabel cat = dtr.GetPath(i); Assert.True(categories.Add(cat), "category " + cat + " already existed"); } dtr.Dispose(); IOUtils.Close(src, dest); }
public virtual void TestRecreateAndRefresh() { // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in // CREATE_OR_APPEND (or commit(userData) called twice), which could lead to // DirTaxoReader succeeding to refresh(). Directory dir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); TouchTaxo(taxoWriter, new FacetLabel("a")); var taxoReader = new DirectoryTaxonomyReader(dir); TouchTaxo(taxoWriter, new FacetLabel("b")); var newtr = TaxonomyReader.OpenIfChanged(taxoReader); taxoReader.Dispose(); taxoReader = newtr; Assert.AreEqual(1, Convert.ToInt32(taxoReader.CommitUserData[DirectoryTaxonomyWriter.INDEX_EPOCH])); // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again. taxoWriter.Dispose(); taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE); TouchTaxo(taxoWriter, new FacetLabel("c")); taxoWriter.Dispose(); taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); TouchTaxo(taxoWriter, new FacetLabel("d")); taxoWriter.Dispose(); newtr = TaxonomyReader.OpenIfChanged(taxoReader); taxoReader.Dispose(); taxoReader = newtr; Assert.AreEqual(2, Convert.ToInt32(taxoReader.CommitUserData[DirectoryTaxonomyWriter.INDEX_EPOCH])); taxoReader.Dispose(); dir.Dispose(); }
public virtual void TestOpenIfChangedReuseAfterRecreate() { // tests that if the taxonomy is recreated, no data is reused from the previous taxonomy Directory dir = NewDirectory(); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); FacetLabel cp_a = new FacetLabel("a"); writer.AddCategory(cp_a); writer.Dispose(); DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir); // fill r1's caches Assert.AreEqual(1, r1.GetOrdinal(cp_a)); Assert.AreEqual(cp_a, r1.GetPath(1)); // now recreate, add a different category writer = new DirectoryTaxonomyWriter(dir, IndexWriterConfig.OpenMode_e.CREATE); FacetLabel cp_b = new FacetLabel("b"); writer.AddCategory(cp_b); writer.Dispose(); DirectoryTaxonomyReader r2 = TaxonomyReader.OpenIfChanged(r1); Assert.NotNull(r2); // fill r2's caches Assert.AreEqual(1, r2.GetOrdinal(cp_b)); Assert.AreEqual(cp_b, r2.GetPath(1)); // check that r1 doesn't see cp_b Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, r1.GetOrdinal(cp_b)); Assert.AreEqual(cp_a, r1.GetPath(1)); // check that r2 doesn't see cp_a Assert.AreEqual(TaxonomyReader.INVALID_ORDINAL, r2.GetOrdinal(cp_a)); Assert.AreEqual(cp_b, r2.GetPath(1)); (r2).Dispose(); (r1).Dispose(); dir.Dispose(); }
public virtual void TestOpenIfChangedManySegments() { // test openIfChanged() when the taxonomy contains many segments Directory dir = NewDirectory(); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper(this, dir); var reader = new DirectoryTaxonomyReader(writer); int numRounds = Random().Next(10) + 10; int numCategories = 1; // one for root for (int i = 0; i < numRounds; i++) { int numCats = Random().Next(4) + 1; for (int j = 0; j < numCats; j++) { writer.AddCategory(new FacetLabel(Convert.ToString(i), Convert.ToString(j))); } numCategories += numCats + 1; // one for round-parent var newtr = TaxonomyReader.OpenIfChanged(reader); Assert.NotNull(newtr); reader.Dispose(); reader = newtr; // assert categories Assert.AreEqual(numCategories, reader.Size); int roundOrdinal = reader.GetOrdinal(new FacetLabel(Convert.ToString(i))); int[] parents = reader.ParallelTaxonomyArrays.Parents(); Assert.AreEqual(0, parents[roundOrdinal]); // round's parent is root for (int j = 0; j < numCats; j++) { int ord = reader.GetOrdinal(new FacetLabel(Convert.ToString(i), Convert.ToString(j))); Assert.AreEqual(roundOrdinal, parents[ord]); // round's parent is root } } reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestGetChildren() { Directory dir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(dir); int numCategories = AtLeast(10); int numA = 0, numB = 0; Random random = Random; // add the two categories for which we'll also add children (so asserts are simpler) taxoWriter.AddCategory(new FacetLabel("a")); taxoWriter.AddCategory(new FacetLabel("b")); for (int i = 0; i < numCategories; i++) { if (random.NextBoolean()) { taxoWriter.AddCategory(new FacetLabel("a", Convert.ToString(i, CultureInfo.InvariantCulture))); ++numA; } else { taxoWriter.AddCategory(new FacetLabel("b", Convert.ToString(i, CultureInfo.InvariantCulture))); ++numB; } } // add category with no children taxoWriter.AddCategory(new FacetLabel("c")); taxoWriter.Dispose(); var taxoReader = new DirectoryTaxonomyReader(dir); // non existing category TaxonomyReader.ChildrenEnumerator it = taxoReader.GetChildren(taxoReader.GetOrdinal(new FacetLabel("invalid"))); Assert.AreEqual(false, it.MoveNext()); // a category with no children it = taxoReader.GetChildren(taxoReader.GetOrdinal(new FacetLabel("c"))); Assert.AreEqual(false, it.MoveNext()); // arbitrary negative ordinal it = taxoReader.GetChildren(-2); Assert.AreEqual(false, it.MoveNext()); // root's children var roots = new JCG.HashSet <string> { "a", "b", "c" }; it = taxoReader.GetChildren(TaxonomyReader.ROOT_ORDINAL); while (roots.Count > 0) { it.MoveNext(); FacetLabel root = taxoReader.GetPath(it.Current); Assert.AreEqual(1, root.Length); Assert.IsTrue(roots.Remove(root.Components[0])); } Assert.AreEqual(false, it.MoveNext()); for (int i = 0; i < 2; i++) { FacetLabel cp = i == 0 ? new FacetLabel("a") : new FacetLabel("b"); int ordinal = taxoReader.GetOrdinal(cp); it = taxoReader.GetChildren(ordinal); int numChildren = 0; int child; while (it.MoveNext()) { child = it.Current; FacetLabel path = taxoReader.GetPath(child); Assert.AreEqual(2, path.Length); Assert.AreEqual(path.Components[0], i == 0 ? "a" : "b"); ++numChildren; } int expected = i == 0 ? numA : numB; Assert.AreEqual(expected, numChildren, "invalid num children"); } taxoReader.Dispose(); dir.Dispose(); }
private void doTestReadRecreatedTaxonomy(Random random, bool closeReader) { Directory dir = null; TaxonomyWriter tw = null; TaxonomyReader tr = null; // prepare a few categories int n = 10; FacetLabel[] cp = new FacetLabel[n]; for (int i = 0; i < n; i++) { cp[i] = new FacetLabel("a", Convert.ToString(i)); } try { dir = NewDirectory(); tw = new DirectoryTaxonomyWriter(dir); tw.AddCategory(new FacetLabel("a")); tw.Dispose(); tr = new DirectoryTaxonomyReader(dir); int baseNumCategories = tr.Size; for (int i = 0; i < n; i++) { int k = random.Next(n); tw = new DirectoryTaxonomyWriter(dir, IndexWriterConfig.OpenMode_e.CREATE); for (int j = 0; j <= k; j++) { tw.AddCategory(cp[j]); } tw.Dispose(); if (closeReader) { tr.Dispose(true); tr = new DirectoryTaxonomyReader(dir); } else { var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(true); tr = newtr; } Assert.AreEqual(baseNumCategories + 1 + k, tr.Size, "Wrong #categories in taxonomy (i=" + i + ", k=" + k + ")"); } } finally { IOUtils.Close(tr as DirectoryTaxonomyReader, tw, dir); } }