Exemple #1
0
        public DataBlockResult Compute(Partition p)
        {
            if (p == null)
            {
                throw new ArgumentNullException("p");
            }
            DataBlockResult result = new DataBlockResult(this);

            // Find matches for the remaining query
            ShortSet baseQueryMatches = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, baseQueryMatches, result.Details);

            // Find and count matches per column for the term in the outer query
            List <Tuple <string, int> > matchCountPerColumn = new List <Tuple <string, int> >();

            if (baseQueryMatches.Count() > 0)
            {
                TermExpression bareTerm             = new TermExpression(this.Term);
                ShortSet       termMatchesForColumn = new ShortSet(p.Count);

                bool             succeeded        = false;
                ExecutionDetails perColumnDetails = new ExecutionDetails();

                foreach (IColumn <object> column in p.Columns.Values)
                {
                    termMatchesForColumn.Clear();

                    perColumnDetails.Succeeded = true;
                    column.TryWhere(Operator.Matches, this.Term, termMatchesForColumn, perColumnDetails);
                    succeeded |= perColumnDetails.Succeeded;

                    termMatchesForColumn.And(baseQueryMatches);

                    ushort matchCount = termMatchesForColumn.Count();
                    if (matchCount > 0)
                    {
                        matchCountPerColumn.Add(new Tuple <string, int>(column.Name, (int)matchCount));
                    }
                }

                // Sort results by count of matches descending
                matchCountPerColumn.Sort((left, right) => right.Item2.CompareTo(left.Item2));
            }

            // Copy to a DataBlock and return it
            int       index = 0;
            DataBlock block = new DataBlock(new string[] { "ColumnName", "Count" }, matchCountPerColumn.Count);

            foreach (var column in matchCountPerColumn)
            {
                block[index, 0] = column.Item1;
                block[index, 1] = column.Item2;
                index++;
            }

            result.Values = block;
            result.Total  = baseQueryMatches.Count();
            return(result);
        }
Exemple #2
0
        public void ShortSet_CapacityHandling()
        {
            // Verify values above capacity are not reported, even if there are bits for them
            ShortSet s1 = new ShortSet(10);

            s1.Not();
            Assert.AreEqual("0, 1, 2, 3, 4, 5, 6, 7, 8, 9", String.Join(", ", s1.Values));

            // Verify the last value is not truncated if the this set in an operation is larger,
            // and that values above the other capacity aren't involved in operations.
            ShortSet s2 = new ShortSet(120);

            s2.Not();

            ShortSet s3 = new ShortSet(64);

            s3.Not();

            s2.And(s3);
            Assert.IsTrue(s2.Contains(63));

            s2.Or(s3);
            Assert.IsTrue(s2.Contains(63));

            s3.Not();
            s2.AndNot(s3);
            Assert.IsTrue(s2.Contains(63));
        }
Exemple #3
0
        public void ShortSet_Performance_Set()
        {
            // Goal: Set operations are <10k instructions, so at 2M instructions per millisecond, 200 per millisecond (Release build)
            //  Set operations are used to combine where clauses and sets for specific words when word searching.
            Random   r  = new Random();
            ShortSet s1 = BuildRandom(ushort.MaxValue, 1000, r);
            ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, r);
            ShortSet s3 = BuildRandom(ushort.MaxValue, 50000, r);

            ushort[] s4 = { 1, 126, 950, 1024, 1025, 1670, 19240 };

            ShortSet scratch = new ShortSet(ushort.MaxValue);

            // 9 Operations x 10k iterations = 90k operations.
            // Goal is 100ms.
            int       iterations = 2500;
            Stopwatch w          = Stopwatch.StartNew();

            for (int i = 0; i < iterations; ++i)
            {
                // Singleton Operations / Reset
                scratch.Not();
                scratch.Clear();
                scratch.Or(s1);

                // Enumerable Operations
                scratch.And(s4);
                scratch.Or(s4);
                scratch.AndNot(s4);

                // ShortSet Operations
                scratch.Or(s2);
                scratch.And(s3);
                scratch.AndNot(s2);
            }

            int    operations               = (9 * iterations);
            double milliseconds             = w.ElapsedMilliseconds;
            double operationsPerMillisecond = operations / milliseconds;

            Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond));

            Assert.IsTrue(operationsPerMillisecond > 100, "Not within 200% of goal.");
        }
Exemple #4
0
        public void ShortSet_CapacityZero()
        {
            ShortSet s1 = new ShortSet(0);

            Assert.AreEqual(0, s1.Count());

            s1.Not();
            Assert.AreEqual(0, s1.Count());

            ShortSet s2 = new ShortSet(10);

            s2.Not();
            s1.And(s2);
            s1.Or(s2);
            Assert.AreEqual(0, s1.Count());
        }
Exemple #5
0
        public void ShortSet_MismatchedCapacities()
        {
            ShortSet s1 = new ShortSet(10);

            s1.Add(1);
            s1.Add(3);

            ShortSet s2 = new ShortSet(20);

            s2.Add(2);
            s2.Add(4);
            s2.Add(10);

            // Verify values below common capacity are set, larger values not set.
            s1.Or(s2);
            Assert.AreEqual("1, 2, 3, 4", String.Join(", ", s1.Values));

            // Verify values above common capacity are left alone.
            s2.Or(s1);
            Assert.AreEqual("1, 2, 3, 4, 10", String.Join(", ", s2.Values));

            // Verify values below common capacity are cleared, values above not unexpectedly set.
            s1.AndNot(s2);
            Assert.AreEqual("", String.Join(", ", s1.Values));

            // Verify values above common capacity are left set
            s1.Clear();
            s1.Add(1);
            s1.Add(3);
            s2.AndNot(s1);
            Assert.AreEqual("2, 4, 10", String.Join(", ", s2.Values));

            // Verify values above common capacity are not set
            s2.Or(s1);
            s1.And(s2);
            Assert.AreEqual("1, 3", String.Join(", ", s1.Values));

            // Verify values above common capacity *are* cleared)
            s2.And(s1);
            Assert.AreEqual("1, 3", String.Join(", ", s2.Values));
        }
Exemple #6
0
        public AggregationResult Compute(Partition p)
        {
            if (p == null)
            {
                throw new ArgumentNullException("p");
            }

            Stopwatch         w      = Stopwatch.StartNew();
            AggregationResult result = new AggregationResult(this);

            result.AggregationContext = this.Aggregator.CreateContext();

            // Get any columns passed to the aggregation function
            IUntypedColumn[] columns = null;
            if (this.AggregationColumns != null)
            {
                columns = new IUntypedColumn[this.AggregationColumns.Length];

                for (int i = 0; i < this.AggregationColumns.Length; ++i)
                {
                    string columnName = this.AggregationColumns[i];

                    if (!p.Columns.TryGetValue(columnName, out columns[i]))
                    {
                        result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, columnName);
                        return(result);
                    }
                }
            }

            // Find the number of dimensions and number of "cells" for which we'll aggregate
            List <string> resultBlockColumns = new List <string>();
            int           rowCount           = 1;

            for (int i = 0; i < this.Dimensions.Count; ++i)
            {
                AggregationDimension dimension = this.Dimensions[i];

                if (!String.IsNullOrEmpty(dimension.Name))
                {
                    resultBlockColumns.Add(dimension.Name);
                }
                else
                {
                    resultBlockColumns.Add(StringExtensions.Format("Dimension {0}", i + 1));
                }

                rowCount *= (dimension.GroupByWhere.Count + 1);
            }

            resultBlockColumns.Add("Aggregate");

            // Create the DataBlock to hold the final results
            result.Values = new DataBlock(resultBlockColumns, rowCount);

            // Find the set of items in the base query
            ShortSet baseWhereSet = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, baseWhereSet, result.Details);
            result.Total = baseWhereSet.Count();

            // If this is only one dimension, use only one ShortSet and aggregate as we go
            if (this.Dimensions.Count == 1)
            {
                AggregationDimension dimension       = this.Dimensions[0];
                ShortSet             setForDimension = new ShortSet(p.Count);
                int nextBlockRow = 0;

                foreach (IExpression dimensionValue in dimension.GroupByWhere)
                {
                    // Get the set for this value intersected with the base set
                    setForDimension.Clear();
                    dimensionValue.TryEvaluate(p, setForDimension, result.Details);
                    setForDimension.And(baseWhereSet);

                    // Compute and store the aggregate value
                    if (!setForDimension.IsEmpty())
                    {
                        result.Values[nextBlockRow, 1] = this.Aggregator.Aggregate(result.AggregationContext, setForDimension, columns);
                    }

                    nextBlockRow++;
                }

                // Add the total
                result.Values[nextBlockRow, 1] = this.Aggregator.Aggregate(result.AggregationContext, baseWhereSet, columns);
            }
            else
            {
                // Compute the set of items actually matching each dimension-value
                List <List <Tuple <IExpression, ShortSet> > > allDimensionValueSets = new List <List <Tuple <IExpression, ShortSet> > >();
                foreach (AggregationDimension dimension in this.Dimensions)
                {
                    List <Tuple <IExpression, ShortSet> > dimensionSet = new List <Tuple <IExpression, ShortSet> >();

                    // Add one item for each value in this dimension
                    foreach (IExpression dimensionValue in dimension.GroupByWhere)
                    {
                        ShortSet setForDimensionValue = new ShortSet(p.Count);
                        dimensionValue.TryEvaluate(p, setForDimensionValue, result.Details);

                        dimensionSet.Add(new Tuple <IExpression, ShortSet>(dimensionValue, setForDimensionValue));
                    }

                    // Add one 'Total row' item
                    dimensionSet.Add(new Tuple <IExpression, ShortSet>(new AllExpression(), baseWhereSet));

                    allDimensionValueSets.Add(dimensionSet);
                }

                // Run the aggregator over the items
                AggregateAllDimensionsFlat(result.AggregationContext, result.Values, p.Count, baseWhereSet, allDimensionValueSets, columns, this.Aggregator);
            }

            // Add the dimension names to the result if this is the only partition; otherwise, merge will add it
            if (p.Mask.Equals(PartitionMask.All))
            {
                AddDimensionsToBlock(result.Values);
            }

            // Capture timing and return
            result.Runtime = w.Elapsed;
            return(result);
        }
Exemple #7
0
        public void ShortSet_Basic()
        {
            // Constructor
            ShortSet s1 = new ShortSet(100);

            // Empty
            Assert.AreEqual("", String.Join(", ", s1.Values));

            // Set value and enumerate
            s1.Add(0);
            Assert.AreEqual("0", String.Join(", ", s1.Values));

            // Set additional values
            s1.Add(15);
            s1.Add(64);
            Assert.AreEqual("0, 15, 64", String.Join(", ", s1.Values));

            // Clear values
            s1.Remove(64);
            Assert.AreEqual("0, 15", String.Join(", ", s1.Values));

            // Or
            ShortSet s2 = new ShortSet(120);

            s2.Or(new ushort[] { 0, 1, 2 });
            s1.Or(s2);
            Assert.AreEqual("0, 1, 2, 15", String.Join(", ", s1.Values));
            Assert.AreEqual("0, 1, 2", String.Join(", ", s2.Values));
            Verify.Exception <ArgumentNullException>(() => s1.Or((ShortSet)null));
            Verify.Exception <ArgumentNullException>(() => s1.Or((IEnumerable <ushort>)null));

            // OrNot [only 15, 16 not set, so only they should be added]
            ShortSet s3 = new ShortSet(100);

            s3.Not();
            s3.Remove(15);
            s3.Remove(16);
            s1.OrNot(s3);
            Assert.AreEqual("0, 1, 2, 15, 16", String.Join(", ", s1.Values));
            Verify.Exception <ArgumentNullException>(() => s1.OrNot((ShortSet)null));

            // And
            s1.And(s2);
            s1.And(new ushort[] { 1, 2 });
            Assert.AreEqual("1, 2", String.Join(", ", s1.Values));
            s1.And(new ushort[] { 1 });
            Assert.AreEqual("1", String.Join(", ", s1.Values));
            Verify.Exception <ArgumentNullException>(() => s1.And((ShortSet)null));
            Verify.Exception <ArgumentNullException>(() => s1.And((IEnumerable <ushort>)null));

            // AndNot
            s1.Add(96);
            s1.Add(64);
            s1.AndNot(s2);
            s1.AndNot(new ushort[] { 96 });
            Assert.AreEqual("64", String.Join(", ", s1.Values));
            Verify.Exception <ArgumentNullException>(() => s1.AndNot((ShortSet)null));
            Verify.Exception <ArgumentNullException>(() => s1.AndNot((IEnumerable <ushort>)null));

            // Clear
            s1.Clear();
            Assert.AreEqual("", String.Join(", ", s1.Values));

            // From
            s1.From(s2);
            Assert.AreEqual("0, 1, 2", String.Join(", ", s1.Values));
            Verify.Exception <ArgumentNullException>(() => s1.From((ShortSet)null));

            // FromAnd
            ShortSet s4 = new ShortSet(100);

            s4.Or(new ushort[] { 1, 2, 3 });
            s1.Clear();
            s1.Not();
            s1.FromAnd(s2, s4);
            Assert.AreEqual("1, 2", String.Join(", ", s1.Values));
            Verify.Exception <ArgumentNullException>(() => s1.FromAnd((ShortSet)null, s2));
            Verify.Exception <ArgumentNullException>(() => s1.FromAnd(s2, (ShortSet)null));

            // ToString
            Assert.AreEqual("[1, 2]", s1.ToString());
        }
Exemple #8
0
        public override DistinctResult Compute(Partition p)
        {
            if (p == null)
            {
                throw new ArgumentNullException("p");
            }
            DistinctResult result = new DistinctResult(this);

            // Verify the column exists
            if (!p.ContainsColumn(this.Column))
            {
                result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column);
                return(result);
            }

            // Find the set of items matching the base where clause
            ShortSet whereSet = new ShortSet(p.Count);

            this.Where.TryEvaluate(p, whereSet, result.Details);

            // Capture the total of the base query
            result.Total = whereSet.Count();

            // Add a prefix filter for the prefix so far, if any and the column can prefix match
            if (!String.IsNullOrEmpty(this.ValuePrefix))
            {
                ExecutionDetails prefixDetails = new ExecutionDetails();
                ShortSet         prefixSet     = new ShortSet(p.Count);
                new TermExpression(this.Column, Operator.StartsWith, this.ValuePrefix).TryEvaluate(p, prefixSet, prefixDetails);
                if (prefixDetails.Succeeded)
                {
                    whereSet.And(prefixSet);
                }
            }

            if (result.Details.Succeeded)
            {
                // Count the occurences of each value
                Dictionary <object, int> countByValue = new Dictionary <object, int>();
                IUntypedColumn           column       = p.Columns[this.Column];

                for (int i = 0; i < column.Count; ++i)
                {
                    ushort lid = (ushort)i;
                    if (whereSet.Contains(lid))
                    {
                        object value = column[lid];

                        int count;
                        countByValue.TryGetValue(value, out count);
                        countByValue[value] = count + 1;
                    }
                }

                // Convert the top this.Count rows by count into a DataBlock
                result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count);

                result.AllValuesReturned = result.Values.RowCount == countByValue.Count;
            }

            return(result);
        }