Exemplo n.º 1
0
        /// <summary>
        /// Add a new category into the index (and the cache), and return its new
        /// ordinal.
        /// <para>
        /// Actually, we might also need to add some of the category's ancestors
        /// before we can add the category itself (while keeping the invariant that a
        /// parent is always added to the taxonomy before its child). We do this by
        /// recursion.
        /// </para>
        /// </summary>
        private int InternalAddCategory(FacetLabel cp)
        {
            // Find our parent's ordinal (recursively adding the parent category
            // to the taxonomy if it's not already there). Then add the parent
            // ordinal as payloads (rather than a stored field; payloads can be
            // more efficiently read into memory in bulk by LuceneTaxonomyReader)
            int parent;

            if (cp.Length > 1)
            {
                FacetLabel parentPath = cp.Subpath(cp.Length - 1);
                parent = FindCategory(parentPath);
                if (parent < 0)
                {
                    parent = InternalAddCategory(parentPath);
                }
            }
            else if (cp.Length == 1)
            {
                parent = TaxonomyReader.ROOT_ORDINAL;
            }
            else
            {
                parent = TaxonomyReader.INVALID_ORDINAL;
            }
            int id = AddCategoryDocument(cp, parent);

            return(id);
        }
Exemplo n.º 2
0
 internal virtual object Key(FacetLabel name, int prefixLen)
 {
     return(name.Subpath(prefixLen));
 }
Exemplo n.º 3
0
 internal virtual object Key(FacetLabel name, int prefixLen)
 {
     return name.Subpath(prefixLen);
 }
Exemplo n.º 4
0
        public virtual void TestConcurrency()
        {
            AtomicInt32 numDocs  = new AtomicInt32(AtLeast(10000));
            Directory   indexDir = NewDirectory();
            Directory   taxoDir  = NewDirectory();
            ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>();
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            var         tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs));

            ThreadJob[]  indexThreads = new ThreadJob[AtLeast(4)];
            FacetsConfig config       = new FacetsConfig();

            for (int i = 0; i < 10; i++)
            {
                config.SetHierarchical("l1." + i, true);
                config.SetMultiValued("l1." + i, true);
            }

            for (int i = 0; i < indexThreads.Length; i++)
            {
                indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config);
            }

            foreach (ThreadJob t in indexThreads)
            {
                t.Start();
            }
            foreach (ThreadJob t in indexThreads)
            {
                t.Join();
            }

            var tr = new DirectoryTaxonomyReader(tw);

            // +1 for root category
            if (values.Count + 1 != tr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (tr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                fail("mismatch number of categories");
            }
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.IsTrue(tr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = null;
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = tr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir);
        }
        /// <summary>
        /// Add a new category into the index (and the cache), and return its new
        /// ordinal.
        /// <para>
        /// Actually, we might also need to add some of the category's ancestors
        /// before we can add the category itself (while keeping the invariant that a
        /// parent is always added to the taxonomy before its child). We do this by
        /// recursion.
        /// </para>
        /// </summary>
        private int InternalAddCategory(FacetLabel cp)
        {
            // Find our parent's ordinal (recursively adding the parent category
            // to the taxonomy if it's not already there). Then add the parent
            // ordinal as payloads (rather than a stored field; payloads can be
            // more efficiently read into memory in bulk by LuceneTaxonomyReader)
            int parent;
            if (cp.Length > 1)
            {
                FacetLabel parentPath = cp.Subpath(cp.Length - 1);
                parent = FindCategory(parentPath);
                if (parent < 0)
                {
                    parent = InternalAddCategory(parentPath);
                }
            }
            else if (cp.Length == 1)
            {
                parent = TaxonomyReader.ROOT_ORDINAL;
            }
            else
            {
                parent = TaxonomyReader.INVALID_ORDINAL;
            }
            int id = AddCategoryDocument(cp, parent);

            return id;
        }
        public virtual void TestConcurrency()
        {
            int ncats = AtLeast(100000); // add many categories
            int range = ncats * 3; // affects the categories selection
            AtomicInteger numCats = new AtomicInteger(ncats);
            Directory dir = NewDirectory();
            var values = new ConcurrentDictionary<string, string>();
            double d = Random().NextDouble();
            ITaxonomyWriterCache cache;
            if (d < 0.7)
            {
                // this is the fastest, yet most memory consuming
                cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
            }
            else if (TEST_NIGHTLY && d > 0.98)
            {
                // this is the slowest, but tests the writer concurrency when no caching is done.
                // only pick it during NIGHTLY tests, and even then, with very low chances.
                cache = NO_OP_CACHE;
            }
            else
            {
                // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
                cache = new LruTaxonomyWriterCache(ncats / 10);
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: use cache=" + cache);
            }
            var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
            ThreadClass[] addThreads = new ThreadClass[AtLeast(4)];
            for (int z = 0; z < addThreads.Length; z++)
            {
                addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw);
            }

            foreach (var t in addThreads)
            {
                t.Start();
            }
            foreach (var t in addThreads)
            {
                t.Join();
            }
            tw.Dispose();

            DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
            // +1 for root category
            if (values.Count + 1 != dtr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (dtr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }

            int[] parents = dtr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp);
                int level = cp.Length;
                int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path = new FacetLabel();
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = dtr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Close(dtr, dir);
        }
        public virtual void TestConcurrency()
        {
            int                 ncats   = AtLeast(100000); // add many categories
            int                 range   = ncats * 3;       // affects the categories selection
            AtomicInteger       numCats = new AtomicInteger(ncats);
            Directory           dir     = NewDirectory();
            var                 values  = new ConcurrentDictionary <string, string>();
            double              d       = Random().NextDouble();
            TaxonomyWriterCache cache;

            if (d < 0.7)
            {
                // this is the fastest, yet most memory consuming
                cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
            }
            else if (TEST_NIGHTLY && d > 0.98)
            {
                // this is the slowest, but tests the writer concurrency when no caching is done.
                // only pick it during NIGHTLY tests, and even then, with very low chances.
                cache = NO_OP_CACHE;
            }
            else
            {
                // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
                cache = new LruTaxonomyWriterCache(ncats / 10);
            }
            if (VERBOSE)
            {
                Console.WriteLine("TEST: use cache=" + cache);
            }
            var tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);

            ThreadClass[] addThreads = new ThreadClass[AtLeast(4)];
            for (int z = 0; z < addThreads.Length; z++)
            {
                addThreads[z] = new ThreadAnonymousInnerClassHelper(this, range, numCats, values, tw);
            }

            foreach (var t in addThreads)
            {
                t.Start();
            }
            foreach (var t in addThreads)
            {
                t.Join();
            }
            tw.Dispose();

            DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);

            // +1 for root category
            if (values.Count + 1 != dtr.Size)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (dtr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }

            int[] parents = dtr.ParallelTaxonomyArrays.Parents();
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(dtr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = new FacetLabel();
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = dtr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Close(dtr, dir);
        }
        public virtual void TestConcurrency()
        {
            AtomicInteger numDocs = new AtomicInteger(AtLeast(10000));
            Directory indexDir = NewDirectory();
            Directory taxoDir = NewDirectory();
            ConcurrentDictionary<string, string> values = new ConcurrentDictionary<string, string>();
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs.Get()));
            ThreadClass[] indexThreads = new ThreadClass[AtLeast(4)];
            FacetsConfig config = new FacetsConfig();
            for (int i = 0; i < 10; i++)
            {
                config.SetHierarchical("l1." + i, true);
                config.SetMultiValued("l1." + i, true);
            }

            for (int i = 0; i < indexThreads.Length; i++)
            {
                indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config);
            }

            foreach (ThreadClass t in indexThreads)
            {
                t.Start();
            }
            foreach (ThreadClass t in indexThreads)
            {
                t.Join();
            }

            var tr = new DirectoryTaxonomyReader(tw);
            // +1 for root category
            if (values.Count + 1 != tr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (tr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                Fail("mismatch number of categories");
            }
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp);
                int level = cp.Length;
                int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path = null;
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = tr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Close(tw, iw, tr, taxoDir, indexDir);
        }
Exemplo n.º 9
0
	  internal override object Key(FacetLabel name, int prefixLen)
	  {
		return new long?(name.Subpath(prefixLen).LongHashCode());
	  }
Exemplo n.º 10
0
 internal override object Key(FacetLabel name, int prefixLen)
 {
     return(new long?(name.Subpath(prefixLen).LongHashCode()));
 }
Exemplo n.º 11
0
        public virtual void TestSubPath()
        {
            FacetLabel p = new FacetLabel("hi", "there", "man");
            Assert.AreEqual(p.Length, 3);

            FacetLabel p1 = p.Subpath(2);
            Assert.AreEqual(2, p1.Length);
            Assert.AreEqual("FacetLabel: [hi, there]", p1.ToString());

            p1 = p.Subpath(1);
            Assert.AreEqual(1, p1.Length);
            Assert.AreEqual("FacetLabel: [hi]", p1.ToString());

            p1 = p.Subpath(0);
            Assert.AreEqual(0, p1.Length);
            Assert.AreEqual("FacetLabel: []", p1.ToString());

            // with all the following lengths, the prefix should be the whole path
            int[] lengths = new int[] { 3, -1, 4 };
            for (int i = 0; i < lengths.Length; i++)
            {
                p1 = p.Subpath(lengths[i]);
                Assert.AreEqual(3, p1.Length);
                Assert.AreEqual("FacetLabel: [hi, there, man]", p1.ToString());
                Assert.AreEqual(p, p1);
            }
        }