public void TestCardinality()
        {
            Console.WriteLine("testing EWAH GetCardinality");
            var bitmap = new EwahCompressedBitArray();

            bitmap.Set(int.MaxValue);

            //Assert.AreEqual(true, false);
            Console.WriteLine("Total Items %d\n", bitmap.GetCardinality());
            Assert.AreEqual(bitmap.GetCardinality(), 1UL);
            Console.WriteLine("testing EWAH GetCardinality:ok");
        }
        public void TestNot()
        {
            Console.WriteLine("testing not");
            var bmp = new EwahCompressedBitArray();

            for (int i = 0; i <= 184; i++)
            {
                bmp.Set(i);
            }
            Assert.AreEqual(185, bmp.GetCardinality());
            bmp.Not();
            Assert.AreEqual(0, bmp.GetCardinality());
            Console.WriteLine("testing not:ok");
        }
 public void testsetSizeInBits()
 {
     Console.WriteLine("testing setSizeInBits");
     for (int k = 0; k < 4096; ++k)
     {
         EwahCompressedBitArray ewah = new EwahCompressedBitArray();
         ewah.SizeInBits = k;
         Assert.AreEqual(ewah.SizeInBits, k);
         Assert.AreEqual(ewah.GetCardinality(), 0);
         EwahCompressedBitArray ewah2 = new EwahCompressedBitArray();
         ewah2.SetSizeInBits(k, false);
         Assert.AreEqual(ewah2.SizeInBits, k);
         Assert.AreEqual(ewah2.GetCardinality(), 0);
         EwahCompressedBitArray ewah3 = new EwahCompressedBitArray();
         for (int i = 0; i < k; ++i)
         {
             ewah3.Set(i);
         }
         Assert.AreEqual(ewah3.SizeInBits, k);
         Assert.AreEqual(ewah3.GetCardinality(), k);
         EwahCompressedBitArray ewah4 = new EwahCompressedBitArray();
         ewah4.SetSizeInBits(k, true);
         Assert.AreEqual(ewah4.SizeInBits, k);
         Assert.AreEqual(ewah4.GetCardinality(), k);
     }
 }
        public void TestSizeInBits1()
        {
            Console.WriteLine("testing TestSizeInBits1");
            EwahCompressedBitArray bitmap = new EwahCompressedBitArray();

            bitmap.SetSizeInBits(1, false);
            Assert.AreEqual(1, bitmap.SizeInBits);
            bitmap.Not();
            Assert.AreEqual(1, bitmap.GetCardinality());
        }
        /**
         * Convenience function to assess equality between a compressed BitArray
         * and an uncompressed BitArray
         *
         * @param x the compressed BitArray/bitmap
         * @param y the uncompressed BitArray/bitmap
         */

        private static void AreEqual(EwahCompressedBitArray x, BitArray y)
        {
            Assert.AreEqual(x.GetCardinality(), y.Cardinality());
            var positions = new List <int>();

            for (int ii = 0; ii < y.Count; ii++)
            {
                if (y[ii])
                {
                    positions.Add(ii);
                }
            }
            AreEqual(x.GetPositions(), positions);
        }
        /**
         * Pseudo-non-deterministic test inspired by S.J.vanSchaik.
         * (Yes, non-deterministic tests are bad, but the test is actually deterministic.)
         */

        /**
         * Pseudo-non-deterministic test inspired by Federico Fissore.
         *
         * @param length the number of set bits in a bitmap
         */

        private static void ShouldSetBits(int length)
        {
            Console.WriteLine("testing shouldSetBits " + length);
            int[] bitsToSet = CreateSortedIntArrayOfBitsToSet(length, 434222);
            var   ewah      = new EwahCompressedBitArray();

            Console.WriteLine(" ... setting " + bitsToSet.Length + " values");
            foreach (int i in bitsToSet)
            {
                ewah.Set(i);
            }
            Console.WriteLine(" ... verifying " + bitsToSet.Length + " values");
            AreEqual(ewah, bitsToSet);
            Console.WriteLine(" ... checking GetCardinality");
            Assert.AreEqual(bitsToSet.Length, ewah.GetCardinality());
        }
        public void SsiYanKaiTest()
        {
            Console.WriteLine("testing SsiYanKaiTest");
            EwahCompressedBitArray a          = EwahCompressedBitArray.BitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100);
            EwahCompressedBitArray b          = EwahCompressedBitArray.BitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 270000);
            HashSet <int>          aPositions = new HashSet <int>(a.GetPositions());
            int intersection                  = 0;
            EwahCompressedBitArray inter      = new EwahCompressedBitArray();
            HashSet <int>          bPositions = new HashSet <int>(b.GetPositions());

            foreach (int integer in bPositions)
            {
                if (aPositions.Contains(integer))
                {
                    inter.Set(integer);
                    ++intersection;
                }
            }
            EwahCompressedBitArray and2 = a.And(b);
            List <int>             l1   = inter.GetPositions();
            List <int>             l2   = and2.GetPositions();
            var ok = true;

            if (l1.Count != l2.Count)
            {
                Console.WriteLine("cardinality differs = " + l1.Count + " " + l2.Count);
                ok = false;
            }
            for (int k = 0; k < l1.Count; ++k)
            {
                if (l1[k] != l2[k])
                {
                    Console.WriteLine("differ at " + k + " = " + l1[k] + " " + l2[k]);
                    ok = false;
                }
            }
            Assert.IsTrue(ok);
            Assert.AreEqual(true, and2.Equals(inter));
            Assert.AreEqual(inter.GetHashCode(), and2.GetHashCode());
            Assert.AreEqual(intersection, and2.GetCardinality());
        }
        public void VanSchaikTest()
        {
            Console.WriteLine("testing vanSchaikTest (this takes some time)");
            const int    totalNumBits = 32768;
            const double odds         = 0.9;
            var          rand         = new Random(323232323);

            for (int t = 0; t < 100; t++)
            {
                int numBitsSet = 0;
                var cBitMap    = new EwahCompressedBitArray();
                for (int i = 0; i < totalNumBits; i++)
                {
                    if (rand.NextDouble() < odds)
                    {
                        cBitMap.Set(i);
                        numBitsSet++;
                    }
                }
                Assert.AreEqual(cBitMap.GetCardinality(), numBitsSet);
            }
            Console.WriteLine("testing vanSchaikTest:ok");
        }
        /**
         * Assess equality between an uncompressed bitmap and a compressed one,
         * part of a test contributed by Marc Polizzi
         *
         * @param clrBitArray the uncompressed bitmap
         * @param ewahBitmap the compressed bitmap
         */

        private static void assertCardinality(BitArray clrBitArray,
                                              EwahCompressedBitArray ewahBitmap)
        {
            Assert.AreEqual(ewahBitmap.GetCardinality(), clrBitArray.Cardinality());
        }
Beispiel #10
0
        internal QueryResult Query(QueryInfo info, EwahCompressedBitArray exclusionBitMap = null, bool printLoggingMessages = false)
        {
            var bitMap         = GetTagByQueryBitMapLookup(info.Type);
            var questionLookup = GetTagByQueryLookup(info.Type)[TagServer.ALL_TAGS_KEY];

            // Calculating the Cardinality can be (is?) expensive, we don't want to do it in Queries unless we really need to!?
            bool calculateCardinality = true; // false

            if (printLoggingMessages)
            {
                Logger.Log("Tag \"{0}\" is in {1:N0} Questions, Tag \"{2}\" is in {3:N0} Questions",
                           info.Tag, allTags[info.Tag], info.OtherTag, allTags[info.OtherTag]);
            }

            //PrintResults(Enumerable.Range(0, questionLookup.Length), qu => questionLookup[qu], TagServer.ALL_TAGS_KEY, info.Type);
            //PrintResults(bitMap[info.Tag], qu => questionLookup[qu], info.Tag, info.Type);
            //PrintResults(bitMap[info.OtherTag], qu => questionLookup[qu], info.OtherTag, info.Type);

            var timer      = Stopwatch.StartNew();
            var tag1BitMap = bitMap[info.Tag];
            var tag2BitMap = bitMap[info.OtherTag];
            EwahCompressedBitArray bitMapResult = new EwahCompressedBitArray();

            switch (info.Operator)
            {
            case "AND":
                bitMapResult = tag1BitMap.And(tag2BitMap);
                break;

            case "AND-NOT":
                bitMapResult = tag1BitMap.AndNot(tag2BitMap);
                break;

            case "OR":
                bitMapResult = tag1BitMap.Or(tag2BitMap);
                break;

            case "OR-NOT":     //"i.e. .net+or+jquery-"
                bitMapResult = tag1BitMap.OrNot(tag2BitMap);
                break;

            // TODO Work out what a "NOT" query really means, the LINQ version was "result = tag1Query.Except(tag2Query)" (which is the same as AND-NOT?!)
            //case "NOT":
            //    var bitMapResult = (EwahCompressedBitArray)tag2BitMap.Clone();
            //    bitMapResult.Not();
            //    break;

            default:
                throw new InvalidOperationException(string.Format("Invalid operator specified: {0}", info.Operator ?? "<NULL>"));
            }

            if (exclusionBitMap != null)
            {
                ulong cardinalityBeforeExclusions = 0;
                if (printLoggingMessages)
                {
                    cardinalityBeforeExclusions = bitMapResult.GetCardinality();
                }

                // The Exclusiong BitMap is Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded
                // That way we can efficiently apply the exclusions by ANDing this BitMap to the previous results
                var exclusionTimer = Stopwatch.StartNew();
                bitMapResult = bitMapResult.AndNot(exclusionBitMap);
                exclusionTimer.Stop();

                if (printLoggingMessages)
                {
                    if (calculateCardinality)
                    {
                        Logger.Log("Took {0,5:N2} ms to apply exclusion BitMap (Cardinality={1:N0}), Results Cardinality: Before={2:N0}, After={3:N0}",
                                   exclusionTimer.Elapsed.TotalMilliseconds, exclusionBitMap.GetCardinality(), cardinalityBeforeExclusions, bitMapResult.GetCardinality());
                    }
                    else
                    {
                        Logger.Log("Took {0,5:N2} ms to apply exclusion BitMap", exclusionTimer.Elapsed.TotalMilliseconds);
                    }
                }
            }

            var resultCollectionTimer = Stopwatch.StartNew();
            var result = bitMapResult.Skip(info.Skip)
                         .Take(info.PageSize)
                         .Select(i => questions[questionLookup[i]])
                         .ToList();

            resultCollectionTimer.Stop();
            if (printLoggingMessages)
            {
                Logger.Log("Took {0,5:N2} ms to collect the results", resultCollectionTimer.Elapsed.TotalMilliseconds);
            }

            timer.Stop();

            Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##"));

            if (printLoggingMessages)
            {
                using (Utils.SetConsoleColour(ConsoleColor.DarkYellow))
                {
                    if (calculateCardinality)
                    {
                        Logger.Log("Took {0,5:N2} ms in TOTAL to calculate \"{1} {2} {3}\", Got {4} results, (Result Cardinality={5:N0})",
                                   timer.Elapsed.TotalMilliseconds, info.Tag, info.Operator, info.OtherTag, result.Count, bitMapResult.GetCardinality());
                    }
                    else
                    {
                        Logger.Log("Took {0,5:N2} ms in TOTAL to calculate \"{1} {2} {3}\", Got {4} results",
                                   timer.Elapsed.TotalMilliseconds, info.Tag, info.Operator, info.OtherTag, result.Count);
                    }
                }
                //PrintResults(bitMapResult, qu => questionLookup[qu], string.Format("{0} {1} {2}", info.Tag, info.Operator, info.OtherTag), info.Type);
                Logger.Log();
            }

            return(new QueryResult
            {
                Questions = result,
                // TODO see if we can get meaningful numbers here, WITHOUT calling GetCardinality() (because it's expensive)
                //Counters = new Dictionary<string, int>
                //{
                //    { "TagCounter", tagCounter },
                //    { "OtherTagCounter", otherTagCounter },
                //    { "ExclusionCounter", exclusionCounter.Counter }
                //}
            });
        }
        internal EwahCompressedBitArray CreateBitMapIndexForExcludedTags(CLR.HashSet <string> tagsToExclude, QueryType queryType, bool printLoggingMessages = false)
        {
            var bitMapTimer = Stopwatch.StartNew();

            var tagLookupForQueryType = GetTagByQueryLookup(queryType);
            var collectIdsTimer       = Stopwatch.StartNew();
            var excludedQuestionIds   = cache.Value.GetCachedHashSet();

            foreach (var tag in tagsToExclude)
            {
                foreach (var id in tagLookupForQueryType[tag])
                {
                    excludedQuestionIds.Add(id);
                }
            }
            collectIdsTimer.Stop();

            // At the end we need to have the BitMap Set (i.e. 1) in places where you CAN use the question, i.e. it's NOT excluded
            // That way we can efficiently apply the exclusions by ANDing this BitMap to the previous results

            var allQuestions = tagLookupForQueryType[TagServer.ALL_TAGS_KEY];
            var setBitsTimer = Stopwatch.StartNew();
            var bitMap       = new EwahCompressedBitArray();

            for (int index = 0; index < allQuestions.Length; index++)
            {
                if (excludedQuestionIds.Contains(allQuestions[index]))
                {
                    var wasSet = bitMap.SetOptimised(index); // Set a bit where you CAN'T use a question
                    if (wasSet == false)
                    {
                        Logger.LogStartupMessage("Error, unable to set bit {0:N0} (SizeInBits = {1:N0})", index, bitMap.SizeInBits);
                    }
                }
            }
            setBitsTimer.Stop();

            var tidyUpTimer = Stopwatch.StartNew();

            bitMap.SetSizeInBits(questions.Count, defaultvalue: false);
            bitMap.Shrink();
            tidyUpTimer.Stop();

            bitMapTimer.Stop();

            if (printLoggingMessages)
            {
                Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to collect {2:N0} Question Ids from {3:N0} Tags",
                                         collectIdsTimer.Elapsed, collectIdsTimer.ElapsedMilliseconds, excludedQuestionIds.Count, tagsToExclude.Count);
                Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to set {2:N0} bits",
                                         setBitsTimer.Elapsed, setBitsTimer.ElapsedMilliseconds, bitMap.GetCardinality());
                Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) to tidy-up the Bit Map (SetSizeInBits(..) and Shrink()), Size={2:N0} bytes ({3:N2} MB)",
                                         tidyUpTimer.Elapsed, tidyUpTimer.ElapsedMilliseconds, bitMap.SizeInBytes, bitMap.SizeInBytes / 1024.0 / 1024.0);

                using (Utils.SetConsoleColour(ConsoleColor.DarkYellow))
                {
                    Logger.LogStartupMessage("Took {0} ({1,6:N0} ms) in TOTAL, made BitMap from {2:N0} Tags ({3:N0} Qu Ids), Cardinality={4:N0} ({5:N0})\n",
                                             bitMapTimer.Elapsed, bitMapTimer.ElapsedMilliseconds,
                                             tagsToExclude.Count,
                                             excludedQuestionIds.Count,
                                             bitMap.GetCardinality(),
                                             (ulong)questions.Count - bitMap.GetCardinality());
                }
            }

            return(bitMap);
        }