Example #1
        private static void SetCountPerformance()
            ShortSet s0 = new ShortSet(ushort.MaxValue);
            ShortSet s1 = new ShortSet(ushort.MaxValue);

            ShortSet s2 = Arriba.Test.ShortSetTests.BuildRandom(ushort.MaxValue, 10000, new Random());

            ushort value  = 0;
            ushort value2 = 0;
            ushort value3 = 0;

            int       iterations = 1000000;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
                value  = s0.Count();
                value2 = s1.Count();
                value3 = s2.Count();

                //value = (ushort)ShortSet.CallOverheadTest();
                //value2 = (ushort)ShortSet.CallOverheadTest();
                //value3 = (ushort)ShortSet.CallOverheadTest();

            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = iterations / milliseconds;

            Trace.Write(String.Format("{0:n0}, {1:n0}, {2:n0}\r\n", value, value2, value3));
            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.\r\n", iterations, milliseconds, operationsPerMillisecond));
        private static object[] GetColumnSamples(Partition p, IUntypedColumn column, ShortSet whereSet, int matchCount)
            // Get up to 500 samples
            int countToGet = Math.Min(500, matchCount);

            object[] samples = new object[countToGet];

            Random r           = new Random();
            int    sampleCount = 0;
            int    countLeft   = matchCount;

            for (int i = 0; i < p.Count && sampleCount < countToGet; ++i)
                ushort lid = (ushort)i;

                if (whereSet.Contains(lid))
                    double excludeChance = r.NextDouble();
                    int    countNeeded   = countToGet - sampleCount;

                    // == if((countNeeded / countLeft) > excludeChance)
                    if (countNeeded > (excludeChance * countLeft))
                        samples[sampleCount] = column[lid];


Example #3
        private static void FromAndPerformance()
            ShortSet s0 = new ShortSet(ushort.MaxValue);
            ShortSet s1 = new ShortSet(ushort.MaxValue);

            ShortSet s2 = Arriba.Test.ShortSetTests.BuildRandom(ushort.MaxValue, 10000, new Random());


            int       iterations = 3000000;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
                s0.FromAnd(s1, s2);

            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = iterations / milliseconds;

            Trace.Write(String.Format("{0:n0}\r\n", s0.Count()));
            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.\r\n", iterations, milliseconds, operationsPerMillisecond));
Example #4
        private static string GetMatches(WordIndex index, string word)
            ShortSet results = new ShortSet(ushort.MaxValue);

            index.WhereMatches(word, results);
            return(String.Join(", ", results.Values));
Example #5
        public void ShortSet_Performance_Enumerate()
            // Goal: Enumerate is <200k instructions, so at 2M instructions per millisecond, 10 per millisecond (Release build)
            //  Enumerate is used to walk set items when computing results in ORDER BY order.
            ShortSet s1 = BuildRandom(ushort.MaxValue, 1000, new Random());
            ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, new Random());

            int       iterations = 500;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
                ICollection <ushort> values1 = s1.Values;
                Assert.AreEqual(1000, values1.Count);

                ICollection <ushort> values2 = s2.Values;
                Assert.AreEqual(10000, values2.Count);

            int    operations               = (2 * iterations);
            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = operations / milliseconds;

            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond));

            Assert.IsTrue(operationsPerMillisecond > 5, "Not within 200% of goal.");
Example #6
        public void ShortSet_Performance_Count()
            // Goal: Count is <10k instructions. 2GHz is 2B/sec, so 2M/ms, so 10k each means 200 iterations per ms (Release build)
            //  Count is used for COUNT(*) aggregate and to compute IntelliSense rank for words in the word index.
            ShortSet s0 = new ShortSet(ushort.MaxValue);

            ShortSet s1 = new ShortSet(ushort.MaxValue);


            ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, new Random());

            int       iterations = 10000;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
                Assert.AreEqual(0, s0.Count());
                Assert.AreEqual(ushort.MaxValue, s1.Count());
                Assert.AreEqual(10000, s2.Count());

            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = (3 * iterations) / milliseconds;

            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", iterations, milliseconds, operationsPerMillisecond));

            Assert.IsTrue(operationsPerMillisecond > 50, "Not within 200% of goal.");
Example #7
        private void CheckTypeDetermination(string numericColumnTypeName)
            // Create a numeric column with 0-10 in it
            IUntypedColumn column = ColumnFactory.Build(new ColumnDetails("Unused", numericColumnTypeName, 10), 0);

            for (int i = 0; i < 10; ++i)
                column[(ushort)i] = i;

            // Include 0, 2, 4, 6, 8 in the results
            ShortSet matches = new ShortSet(10);

            for (int i = 0; i < 10; i += 2)

            // Ask for the Min and verify both the value and type are correct
            // This verifies the type checks in BaseAggregator.Aggregate determine type correctly
            MinAggregator aggregator = new MinAggregator();
            object        context    = aggregator.CreateContext();
            object        result     = aggregator.Aggregate(context, matches, new IUntypedColumn[] { column });

            Assert.AreEqual(column[0], result);
Example #8
        public DataBlockResult Compute(Partition p)
            if (p == null)
                throw new ArgumentNullException("p");
            DataBlockResult result = new DataBlockResult(this);

            // Find matches for the remaining query
            ShortSet baseQueryMatches = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, baseQueryMatches, result.Details);

            // Find and count matches per column for the term in the outer query
            List <Tuple <string, int> > matchCountPerColumn = new List <Tuple <string, int> >();

            if (baseQueryMatches.Count() > 0)
                TermExpression bareTerm             = new TermExpression(this.Term);
                ShortSet       termMatchesForColumn = new ShortSet(p.Count);

                bool             succeeded        = false;
                ExecutionDetails perColumnDetails = new ExecutionDetails();

                foreach (IColumn <object> column in p.Columns.Values)

                    perColumnDetails.Succeeded = true;
                    column.TryWhere(Operator.Matches, this.Term, termMatchesForColumn, perColumnDetails);
                    succeeded |= perColumnDetails.Succeeded;


                    ushort matchCount = termMatchesForColumn.Count();
                    if (matchCount > 0)
                        matchCountPerColumn.Add(new Tuple <string, int>(column.Name, (int)matchCount));

                // Sort results by count of matches descending
                matchCountPerColumn.Sort((left, right) => right.Item2.CompareTo(left.Item2));

            // Copy to a DataBlock and return it
            int       index = 0;
            DataBlock block = new DataBlock(new string[] { "ColumnName", "Count" }, matchCountPerColumn.Count);

            foreach (var column in matchCountPerColumn)
                block[index, 0] = column.Item1;
                block[index, 1] = column.Item2;

            result.Values = block;
            result.Total  = baseQueryMatches.Count();
Example #9
        public void ShortSet_CapacityHandling()
            // Verify values above capacity are not reported, even if there are bits for them
            ShortSet s1 = new ShortSet(10);

            Assert.AreEqual("0, 1, 2, 3, 4, 5, 6, 7, 8, 9", String.Join(", ", s1.Values));

            // Verify the last value is not truncated if the this set in an operation is larger,
            // and that values above the other capacity aren't involved in operations.
            ShortSet s2 = new ShortSet(120);


            ShortSet s3 = new ShortSet(64);




Example #10
        public void TryEvaluate(Partition partition, ShortSet result, ExecutionDetails details)
            if (details == null)
                throw new ArgumentNullException("details");
            if (result == null)
                throw new ArgumentNullException("result");
            if (partition == null)
                throw new ArgumentNullException("partition");

            if (!partition.ContainsColumn(this.ColumnName))
                details.AddError(ExecutionDetails.ColumnDoesNotExist, this.ColumnName);
                IColumn <object> column = partition.Columns[this.ColumnName];

                for (int i = 0; i < this.Values.Length; ++i)
                    column.TryWhere(this.Operator, this.Values.GetValue(i), result, details);
Example #11
 public void TryWhere(Operator op, T value, ShortSet result, ExecutionDetails details)
     // Base Column can't identify matches for any operator in bulk efficiently.
     if (details != null)
         details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name);
Example #12
 public object Aggregate(object context, ShortSet matches, IUntypedColumn[] columns)
     if (matches == null)
         throw new ArgumentNullException("matches");
Example #13
 public void ShortSet_LeadingZeros()
     Assert.AreEqual(0, ShortSet.LeadingZeros(~(0UL)));
     Assert.AreEqual(1, ShortSet.LeadingZeros(~(0UL) >> 1));
     Assert.AreEqual(10, ShortSet.LeadingZeros(0x0038888888888888UL));
     Assert.AreEqual(62, ShortSet.LeadingZeros(0x3UL));
     Assert.AreEqual(64, ShortSet.LeadingZeros(0UL));
Example #14
        public DataBlockResult Compute(Partition p)
            if (p == null)
                throw new ArgumentNullException("p");

            DataBlockResult result = new DataBlockResult(this);

            // Verify the column exists
            if (!p.ContainsColumn(this.Column))
                result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column);

            // Find the set of items matching the where clause
            ShortSet whereSet = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, whereSet, result.Details);

            int matchCount = whereSet.Count();

            result.Total = matchCount;

            if (result.Details.Succeeded && matchCount > 0)
                // Get sample values
                object[] samples = GetColumnSamples(p, p.Columns[this.Column], whereSet, matchCount);

                // Sort them

                // Record the values corresponding to those percentiles
                result.Values = new DataBlock(new string[] { "Percentiles", "Values" }, this.Percentiles.Length);

                for (int i = 0; i < this.Percentiles.Length; ++i)
                    double percentile = this.Percentiles[i];
                    int    sampleRow  = (int)(percentile * samples.Length) - 1;
                    if (sampleRow < 0)
                        sampleRow = 0;
                    if (sampleRow >= samples.Length)
                        sampleRow = samples.Length - 1;

                    result.Values[i, 0] = percentile;
                    result.Values[i, 1] = samples[sampleRow];

Example #15
        public BooleanColumn(bool defaultValue)
            this.DefaultValue = defaultValue;

            ushort recommendedSize = (ushort)ArrayExtensions.RecommendedSize(ArrayExtensions.MinimumSize, ArrayExtensions.MinimumSize, ushort.MaxValue);

            _trueItems = new ShortSet(recommendedSize);
            if (defaultValue == true)
Example #16
        public override void TryWhere(Operator op, T value, ShortSet result, ExecutionDetails details)
            RangeToScan range   = new RangeToScan();
            bool        rangeOk = true;

            // For StartsWith, for ByteBlocks only, implement using IsPrefixOf
            if (op == Operator.StartsWith)
                if (value is ByteBlock)
                    IComparable <T> prefixComparer = (IComparable <T>)((ByteBlock)(object)value).GetExtendedIComparable(ByteBlock.Comparison.IsPrefixOf);     // trust me C#... I'm a professional...

                    int first = FindFirstWhere(prefixComparer);
                    int last  = FindLastWhere(prefixComparer);
                    if (!RangeToScan.TryBuild(Operator.Equals, first, last, this.Column.Count, ref range))
                        rangeOk = false;
                        if (details != null)
                            details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name);
                    rangeOk = false;
                    if (details != null)
                        details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name);
                int first = FindFirstWhere(value);
                int last  = FindLastWhere(value);
                // Determine the range to scan to compute the result
                if (!RangeToScan.TryBuild(op, first, last, this.Column.Count, ref range))
                    rangeOk = false;
                    if (details != null)
                        details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name);

            // Build the result set and return it
            if (rangeOk == true)
                range.AddMatches(this.SortedIDs, result);
Example #17
        public override DistinctResult Compute(Partition p)
            if (p == null)
                throw new ArgumentNullException("p");
            DistinctResult result = new DistinctResult(this);

            // Verify the column exists
            if (!p.ContainsColumn(this.Column))
                result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column);

            // Find the set of items matching the where clause
            ShortSet whereSet = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, whereSet, result.Details);

            if (result.Details.Succeeded)
                // Count the occurences of each value
                Dictionary <object, int> countByValue = new Dictionary <object, int>();
                IUntypedColumn           column       = p.Columns[this.Column];
                int rowCount = 0;

                for (int i = 0; i < column.Count; ++i)
                    ushort lid = (ushort)i;
                    if (whereSet.Contains(lid))
                        object value = column[lid];

                        int count;
                        countByValue.TryGetValue(value, out count);
                        countByValue[value] = count + 1;


                // Convert the top this.Count rows by count into a DataBlock
                result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count);

                result.AllValuesReturned = result.Values.RowCount == countByValue.Count;
                result.Total             = rowCount;

        private static string GetMatches(IpRangeColumn col, Operator op, ByteBlock value)
            ExecutionDetails details = new ExecutionDetails();
            ShortSet         result  = new ShortSet(col.Count);

            col.TryWhere(op, value, result, details);

            if (!details.Succeeded)

Example #19
        /// <summary>
        ///  Delete items from this Partition which meet the provided criteria.
        /// </summary>
        /// <param name="where">Expression matching items to delete</param>
        /// <param name="details">Details of execution</param>
        /// <returns>Result including number deleted</returns>
        public DeleteResult Delete(IExpression where)
            if (where == null)
                throw new ArgumentNullException("where");

            DeleteResult result = new DeleteResult();

            // Find the set of items to delete
            ShortSet whereSet = new ShortSet(this.Count);

            where.TryEvaluate(this, whereSet, result.Details);

            if (result.Details.Succeeded)
                // Swap each item to delete with the last item in the set
                ushort   lastItemIndex = (ushort)(this.Count - 1);
                ushort[] itemsToDelete = whereSet.Values;
                for (int i = itemsToDelete.Length - 1; i >= 0; --i)
                    // If this isn't the last item, swap the last item with it
                    ushort itemToDelete = itemsToDelete[i];
                    if (itemToDelete != lastItemIndex)
                        foreach (IColumn <object> c in this.Columns.Values)
                            c[itemToDelete] = c[lastItemIndex];


                // Resize the set to exclude all of the deleted items (now at the end)
                foreach (IColumn <object> c in this.Columns.Values)
                    c.SetSize((ushort)(lastItemIndex + 1));

                // Record the new count in the Partition itself
                _itemCount = (ushort)(lastItemIndex + 1);

                // Return the count deleted
                result.Count = whereSet.Count();

Example #20
        public virtual void TryEvaluate(Partition partition, ShortSet result, ExecutionDetails details)
            if (details == null)
                throw new ArgumentNullException("details");
            if (result == null)
                throw new ArgumentNullException("result");
            if (partition == null)
                throw new ArgumentNullException("partition");

            if (this.ColumnName.Equals("*"))
                // '*' queries succeed if any column succeeds
                bool             succeeded        = false;
                ExecutionDetails perColumnDetails = new ExecutionDetails();

                foreach (IColumn <object> column in partition.Columns.Values)
                    perColumnDetails.Succeeded = true;
                    column.TryWhere(this.Operator, this.Value, result, perColumnDetails);
                    succeeded |= perColumnDetails.Succeeded;

                details.Succeeded &= succeeded;

                // If no column succeeded, report the full errors
                if (!succeeded)
                if (!partition.ContainsColumn(this.ColumnName))
                    details.AddError(ExecutionDetails.ColumnDoesNotExist, this.ColumnName);
                    partition.Columns[this.ColumnName].TryWhere(this.Operator, this.Value, result, details);
Example #21
        public void TryWhere(Operator op, object value, ShortSet result, ExecutionDetails details)
            T t;

            if (!TryConvert(value, out t))
                if (details != null)
                    details.AddError(ExecutionDetails.UnableToConvertType, value, this.Name, typeof(T).Name);
                _column.TryWhere(op, t, result, details);
Example #22
        public void ShortSet_CapacityZero()
            ShortSet s1 = new ShortSet(0);

            Assert.AreEqual(0, s1.Count());

            Assert.AreEqual(0, s1.Count());

            ShortSet s2 = new ShortSet(10);

            Assert.AreEqual(0, s1.Count());
Example #23
        public void TypedColumn_TimeSpan_Basic()
            IColumn <object> c = ColumnFactory.Build(new ColumnDetails("Duration", "TimeSpan", null), 0);


            c[0] = Value.Create(TimeSpan.FromMinutes(1));
            c[1] = Value.Create("01:00:00");
            c[2] = Value.Create("00:00:01");
            c[3] = Value.Create("1");

            ShortSet longTimes = new ShortSet(c.Count);

            c.TryWhere(Operator.GreaterThan, TimeSpan.FromSeconds(30), longTimes, null);
            Assert.AreEqual("0, 1, 3", String.Join(", ", longTimes.Values));
Example #24
        public static string GetMatches <T>(IColumn <T> column, Operator op, T value)
            ShortSet         result  = new ShortSet(column.Count);
            ExecutionDetails details = new ExecutionDetails();

            // Get the matches
            column.TryWhere(op, value, result, details);

            // Return a distinct value if the evaluation was reported unsuccessful
            if (!details.Succeeded)

            // Return matches for successful evaluation
            return(String.Join(", ", result.Values));
Example #25
        public void Aggregator_BaseBehaviors()
            AggregatorBaseBehaviors(new CountAggregator(), false);
            AggregatorBaseBehaviors(new SumAggregator());
            AggregatorBaseBehaviors(new MinAggregator());
            AggregatorBaseBehaviors(new MaxAggregator());

            // Check BaseAggregator doesn't implement unexpected types or methods
            IUntypedColumn column = ColumnFactory.Build(new ColumnDetails("ID", "bool", false), 100);
            ShortSet       sample = new ShortSet(100);

            sample.Or(new ushort[] { 1, 2, 3 });

            IAggregator aggregator = new BaseAggregator();

            Verify.Exception <NotImplementedException>(() => aggregator.Aggregate(null, sample, new IUntypedColumn[] { column }));
            Verify.Exception <NotImplementedException>(() => aggregator.Merge(null, new object[2]));
Example #26
        public void ShortSet_Performance_Set()
            // Goal: Set operations are <10k instructions, so at 2M instructions per millisecond, 200 per millisecond (Release build)
            //  Set operations are used to combine where clauses and sets for specific words when word searching.
            Random   r  = new Random();
            ShortSet s1 = BuildRandom(ushort.MaxValue, 1000, r);
            ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, r);
            ShortSet s3 = BuildRandom(ushort.MaxValue, 50000, r);

            ushort[] s4 = { 1, 126, 950, 1024, 1025, 1670, 19240 };

            ShortSet scratch = new ShortSet(ushort.MaxValue);

            // 9 Operations x 10k iterations = 90k operations.
            // Goal is 100ms.
            int       iterations = 2500;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
                // Singleton Operations / Reset

                // Enumerable Operations

                // ShortSet Operations

            int    operations               = (9 * iterations);
            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = operations / milliseconds;

            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond));

            Assert.IsTrue(operationsPerMillisecond > 100, "Not within 200% of goal.");
Example #27
        public virtual DistinctResult Compute(Partition p)
            if (p == null)
                throw new ArgumentNullException("p");

            ushort countToReturnPerPartition = (this.Count == 0 || this.Count > ushort.MaxValue) ? ushort.MaxValue : (ushort)this.Count;

            DistinctResult result = new DistinctResult(this);

            // Verify the column exists
            if (!p.ContainsColumn(this.Column))
                result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column);

            // Find the set of items matching the where clause
            ShortSet whereSet = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, whereSet, result.Details);

            if (result.Details.Succeeded)
                IUntypedColumn column = p.Columns[this.Column];

                // Construct a helper object of the correct type to natively work with the column
                GetUniqueValuesWorker helper = NativeContainer.CreateTypedInstance <GetUniqueValuesWorker>(typeof(GetUniqueValuesWorker <>), column.ColumnType);

                bool  allValuesReturned;
                Array uniqueValues = helper.GetUniqueValuesFromColumn(column.InnerColumn, whereSet, countToReturnPerPartition, out allValuesReturned);

                result.ColumnType        = column.ColumnType;
                result.AllValuesReturned = allValuesReturned;

                // Build a DataBlock with the results and return it
                DataBlock resultValues = new DataBlock(new string[] { this.Column }, uniqueValues.GetLength(0));
                resultValues.SetColumn(0, uniqueValues);
                result.Values = resultValues;

Example #28
        public void ShortSet_Performance_GetAndSet()
            // Goal: set operations are <10 instructions, so at 2M instructions per millisecond, >200k per millisecond (Release build)
            //  Get and Set are used when evaluating ORDER BY for small sets and for determining aggregates each item should be included within.
            Random   r  = new Random();
            ShortSet s1 = BuildRandom(ushort.MaxValue, 10000, r);
            ShortSet s2 = BuildRandom(ushort.MaxValue, 1000, r);

            ushort[] getAndSetValues = s2.Values.ToArray();

            // 1k values; 2k operations; 20M total
            int       iterations = 10000;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
                int length = getAndSetValues.Length;
                for (int j = 0; j < length; ++j)
                    ushort value = getAndSetValues[j];
                    //bool initial = s1[value];
                    //s1[value] = !initial;

                    bool initial = s1.Contains(value);
                    if (!initial)

            int    operations               = (2 * getAndSetValues.Length * iterations);
            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = operations / milliseconds;

            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond));

            Assert.IsTrue(operationsPerMillisecond > 75000, "Not within 200% of goal.");
Example #29
        private static string AddMatches(RangeToScan range, ushort[] sortedIDs, ushort[] previousValues)
            ShortSet resultSet = new ShortSet((ushort)range.Count);

            // If previous values are specified, add them
            if (previousValues != null)
                for (int i = 0; i < previousValues.Length; ++i)

            // Add matches for the range
            range.AddMatches(sortedIDs, resultSet);

            // Return the result as a string
            return(String.Join(", ", resultSet.Values));
Example #30
        public void TryEvaluate(Partition partition, ShortSet result, ExecutionDetails details)
            if (details == null)
                throw new ArgumentNullException("details");
            if (result == null)
                throw new ArgumentNullException("result");
            if (partition == null)
                throw new ArgumentNullException("partition");

            // Include all items - clear any and then add everything.
            // ShortSet will scope the set to the ID range valid within the data set.