Based on SimpleFacetedSearch. Uses DocID lists instead on bitmaps. Efficient memory usage for high cardinality sparsely populated facets.

Suitable for high cardinality, sparsely populated facets. i.e. There are a large number of facet values and each facet value is hit in a small percentage of documents. Especially if there are also a large number of documents. SimpleFacetedSearch holds a bitmap for each value representing whether that value is a hit is each document (approx 122KB per 1M documents per facet value). So this is an O(N*M) problem. The memory requirement can grow very quickly.

SparseFacetedSearcher records the DocID (Int32) for each value hit (memory cost = values * hits * 4). SimpleFacetedSearch record a bit for evey document per value (memory cost = values * documents / 8). So if the average number of hits for each value is less than 1/32 or 3.125% then Sparse is more memory efficient.

There are also some enumerable methods than mean there is much less pressure on the GC. Plus some bug fixes.

Esempio n. 1
0
        public void Test5()
        {
            Query query = new MatchAllDocsQuery();

            var sfs  = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(7, hits.TotalHitCount);
        }
Esempio n. 2
0
        public void Test6()
        {
            Query query = new MatchAllDocsQuery();

            var sfs  = new SparseFacetedSearcher(_Reader, "nosuchfield");
            var hits = sfs.Search(query);

            Assert.AreEqual(0, hits.TotalHitCount);
            Assert.AreEqual(0, hits.Facets.Count());
        }
Esempio n. 3
0
        public void Test3()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("block*");

            var sfs  = new SparseFacetedSearcher(_Reader, "lang", "source", "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(6, hits.Facets.Count());
            int nohit = 0;

            foreach (var hpg in hits.Facets)
            {
                //Test for [System.Collections.Generic.KeyNotFoundException : The given key was not present in the dictionary.]
                var x = hits[hpg.Name];
                var y = hits[hpg.Name.ToString()];

                if (hpg.Name[0] == "us" && hpg.Name[1] == "CCN" && hpg.Name[2] == "politics")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                if (hpg.Name[0] == "en" && hpg.Name[1] == "BCC" && hpg.Name[2] == "tech")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                if (hpg.Name[0] == "us" && hpg.Name[1] == "CCN" && hpg.Name[2] == "sport")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                if (hpg.Name[0] == "en" && hpg.Name[1] == "CCN" && hpg.Name[2] == "tech")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                {
                    nohit++;
                    Assert.AreEqual(0, hpg.Count);
                }
            }
            Assert.AreEqual(2, nohit);
            Assert.AreEqual(4, hits.TotalHitCount);

            foreach (var hpg in hits.Facets)
            {
                foreach (Document doc in hpg.Documents)
                {
                    string text = doc.GetField("text").StringValue();
                    Assert.IsTrue(text.Contains("block"));
                }
            }
        }
Esempio n. 4
0
        public void Test7()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("a");

            var sfs  = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(0, hits.TotalHitCount, "Unexpected TotalHitCount");
            foreach (var x in hits.Facets.Where(h => h.Count > 0))
            {
                Assert.Fail("There must be no hit");
            }
        }
Esempio n. 5
0
        public void Test4()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("xxxxxxxxxxxxx");

            var sfs  = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            var facets = hits.Facets.ToArray();

            Assert.AreEqual(4, facets.Length);
            Assert.AreEqual(0, facets[0].Count);
            Assert.AreEqual(0, facets[1].Count);
            Assert.AreEqual(0, facets[2].Count);
        }
Esempio n. 6
0
        public void Test2()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("block*");

            var sfs  = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(4, hits.Facets.Count());

            foreach (var hpg in hits.Facets)
            {
                if (hpg.Name[0] == "politics")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                if (hpg.Name[0] == "tech")
                {
                    Assert.AreEqual(2, hpg.Count);
                }
                else
                if (hpg.Name[0] == "sport")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                {
                    Assert.AreEqual(0, hpg.Count);
                }
            }

            Assert.AreEqual(4, hits.TotalHitCount);

            foreach (var hpg in hits.Facets)
            {
                foreach (Document doc in hpg.Documents)
                {
                    string text = doc.GetField("text").StringValue();
                    Assert.IsTrue(text.Contains("block"));
                }
            }
        }
        public void Test2()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("block*");

            var sfs = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(4, hits.Facets.Count());

            foreach (var hpg in hits.Facets)
            {
                if (hpg.Name[0] == "politics")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                    if (hpg.Name[0] == "tech")
                    {
                        Assert.AreEqual(2, hpg.Count);
                    }
                    else
                        if (hpg.Name[0] == "sport")
                        {
                            Assert.AreEqual(1, hpg.Count);
                        }
                        else
                        {
                            Assert.AreEqual(0, hpg.Count);
                        }
            }

            Assert.AreEqual(4, hits.TotalHitCount);

            foreach (var hpg in hits.Facets)
            {
                foreach (Document doc in hpg.Documents)
                {
                    string text = doc.GetField("text").StringValue();
                    Assert.IsTrue(text.Contains("block"));
                }
            }
        }
Esempio n. 8
0
        public void TestMultiThreadedAccess()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("block*");

            var sfs = new SparseFacetedSearcher(_Reader, "lang", "source", "category");

            _errorCount = 0;

            Thread[] t = new Thread[20];
            for (int i = 0; i < t.Length; i++)
            {
                t[i] = new Thread(MultiThreadedAccessThread);
                t[i].Start(sfs);
            }
            for (int i = 0; i < t.Length; i++)
            {
                t[i].Join();
            }

            Assert.AreEqual(0, _errorCount);
        }
Esempio n. 9
0
        /// <summary>
        /// *****************************************************
        /// * SAMPLE USAGE                                      *
        /// *****************************************************
        /// </summary>
        void HowToUse(string searchString)
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse(searchString);

            var sfs  = new SparseFacetedSearcher(_Reader, "lang", "source", "category");
            var hits = sfs.Search(query);

            long totalHits = hits.TotalHitCount;

            foreach (var hpg in hits.Facets)
            {
                long hitCountPerGroup = hpg.Count;
                var  facetName        = hpg.Name;
                for (int i = 0; i < facetName.Length; i++)
                {
                    string part = facetName[i];
                }
                foreach (Document doc in hpg.Documents)
                {
                    string text = doc.GetField("text").StringValue();
                    System.Diagnostics.Debug.WriteLine(">>" + facetName + ": " + text);
                }
            }
        }
        public void TestMultiThreadedAccess()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("block*");

            var sfs = new SparseFacetedSearcher(_Reader, "lang", "source", "category");
            _errorCount = 0;

            Thread[] t = new Thread[20];
            for (int i = 0; i < t.Length; i++)
            {
                t[i] = new Thread(MultiThreadedAccessThread);
                t[i].Start(sfs);
            }
            for (int i = 0; i < t.Length; i++)
            {
                t[i].Join();
            }

            Assert.AreEqual(0, _errorCount);
        }
        /// <summary>
        /// *****************************************************
        /// * SAMPLE USAGE                                      *
        /// *****************************************************
        /// </summary>
        void HowToUse(string searchString)
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse(searchString);

            var sfs = new SparseFacetedSearcher(_Reader, "lang", "source", "category");
            var hits = sfs.Search(query);

            long totalHits = hits.TotalHitCount;
            foreach (var hpg in hits.Facets)
            {
                long hitCountPerGroup = hpg.Count;
                var facetName = hpg.Name;
                for (int i = 0; i < facetName.Length; i++)
                {
                    string part = facetName[i];
                }
                foreach (Document doc in hpg.Documents)
                {
                    string text = doc.GetField("text").StringValue();
                    System.Diagnostics.Debug.WriteLine(">>" + facetName + ": " + text);
                }
            }
        }
        public void Test7()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("a");

            var sfs = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(0, hits.TotalHitCount, "Unexpected TotalHitCount");
            foreach (var x in hits.Facets.Where(h => h.Count > 0))
            {
                Assert.Fail("There must be no hit");
            }
        }
        public void Test6()
        {
            Query query = new MatchAllDocsQuery();

            var sfs = new SparseFacetedSearcher(_Reader, "nosuchfield");
            var hits = sfs.Search(query);

            Assert.AreEqual(0, hits.TotalHitCount);
            Assert.AreEqual(0, hits.Facets.Count());
        }
        public void Test5()
        {
            Query query = new MatchAllDocsQuery();

            var sfs = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(7, hits.TotalHitCount);
        }
        public void Test4()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("xxxxxxxxxxxxx");

            var sfs = new SparseFacetedSearcher(_Reader, "category");
            var hits = sfs.Search(query);

            var facets = hits.Facets.ToArray();

            Assert.AreEqual(4, facets.Length);
            Assert.AreEqual(0, facets[0].Count);
            Assert.AreEqual(0, facets[1].Count);
            Assert.AreEqual(0, facets[2].Count);
        }
        public void Test3()
        {
            Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)).Parse("block*");

            var sfs = new SparseFacetedSearcher(_Reader, "lang", "source", "category");
            var hits = sfs.Search(query);

            Assert.AreEqual(6, hits.Facets.Count());
            int nohit = 0;

            foreach (var hpg in hits.Facets)
            {
                //Test for [System.Collections.Generic.KeyNotFoundException : The given key was not present in the dictionary.]
                var x = hits[hpg.Name];
                var y = hits[hpg.Name.ToString()];

                if (hpg.Name[0] == "us" && hpg.Name[1] == "CCN" && hpg.Name[2] == "politics")
                {
                    Assert.AreEqual(1, hpg.Count);
                }
                else
                    if (hpg.Name[0] == "en" && hpg.Name[1] == "BCC" && hpg.Name[2] == "tech")
                    {
                        Assert.AreEqual(1, hpg.Count);
                    }
                    else
                        if (hpg.Name[0] == "us" && hpg.Name[1] == "CCN" && hpg.Name[2] == "sport")
                        {
                            Assert.AreEqual(1, hpg.Count);
                        }
                        else
                            if (hpg.Name[0] == "en" && hpg.Name[1] == "CCN" && hpg.Name[2] == "tech")
                            {
                                Assert.AreEqual(1, hpg.Count);
                            }
                            else
                            {
                                nohit++;
                                Assert.AreEqual(0, hpg.Count);
                            }
            }
            Assert.AreEqual(2, nohit);
            Assert.AreEqual(4, hits.TotalHitCount);

            foreach (var hpg in hits.Facets)
            {
                foreach (Document doc in hpg.Documents)
                {
                    string text = doc.GetField("text").StringValue();
                    Assert.IsTrue(text.Contains("block"));
                }
            }
        }