public DataBlockResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DataBlockResult result = new DataBlockResult(this); // Find matches for the remaining query ShortSet baseQueryMatches = new ShortSet(p.Count); this.Where.TryEvaluate(p, baseQueryMatches, result.Details); // Find and count matches per column for the term in the outer query List <Tuple <string, int> > matchCountPerColumn = new List <Tuple <string, int> >(); if (baseQueryMatches.Count() > 0) { TermExpression bareTerm = new TermExpression(this.Term); ShortSet termMatchesForColumn = new ShortSet(p.Count); bool succeeded = false; ExecutionDetails perColumnDetails = new ExecutionDetails(); foreach (IColumn <object> column in p.Columns.Values) { termMatchesForColumn.Clear(); perColumnDetails.Succeeded = true; column.TryWhere(Operator.Matches, this.Term, termMatchesForColumn, perColumnDetails); succeeded |= perColumnDetails.Succeeded; termMatchesForColumn.And(baseQueryMatches); ushort matchCount = termMatchesForColumn.Count(); if (matchCount > 0) { matchCountPerColumn.Add(new Tuple <string, int>(column.Name, (int)matchCount)); } } // Sort results by count of matches descending matchCountPerColumn.Sort((left, right) => right.Item2.CompareTo(left.Item2)); } // Copy to a DataBlock and return it int index = 0; DataBlock block = new DataBlock(new string[] { "ColumnName", "Count" }, matchCountPerColumn.Count); foreach (var column in matchCountPerColumn) { block[index, 0] = column.Item1; block[index, 1] = column.Item2; index++; } result.Values = block; result.Total = baseQueryMatches.Count(); return(result); }
public void ShortSet_CapacityHandling() { // Verify values above capacity are not reported, even if there are bits for them ShortSet s1 = new ShortSet(10); s1.Not(); Assert.AreEqual("0, 1, 2, 3, 4, 5, 6, 7, 8, 9", String.Join(", ", s1.Values)); // Verify the last value is not truncated if the this set in an operation is larger, // and that values above the other capacity aren't involved in operations. ShortSet s2 = new ShortSet(120); s2.Not(); ShortSet s3 = new ShortSet(64); s3.Not(); s2.And(s3); Assert.IsTrue(s2.Contains(63)); s2.Or(s3); Assert.IsTrue(s2.Contains(63)); s3.Not(); s2.AndNot(s3); Assert.IsTrue(s2.Contains(63)); }
public void ShortSet_Performance_Set() { // Goal: Set operations are <10k instructions, so at 2M instructions per millisecond, 200 per millisecond (Release build) // Set operations are used to combine where clauses and sets for specific words when word searching. Random r = new Random(); ShortSet s1 = BuildRandom(ushort.MaxValue, 1000, r); ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, r); ShortSet s3 = BuildRandom(ushort.MaxValue, 50000, r); ushort[] s4 = { 1, 126, 950, 1024, 1025, 1670, 19240 }; ShortSet scratch = new ShortSet(ushort.MaxValue); // 9 Operations x 10k iterations = 90k operations. // Goal is 100ms. int iterations = 2500; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { // Singleton Operations / Reset scratch.Not(); scratch.Clear(); scratch.Or(s1); // Enumerable Operations scratch.And(s4); scratch.Or(s4); scratch.AndNot(s4); // ShortSet Operations scratch.Or(s2); scratch.And(s3); scratch.AndNot(s2); } int operations = (9 * iterations); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = operations / milliseconds; Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond)); Assert.IsTrue(operationsPerMillisecond > 100, "Not within 200% of goal."); }
public void ShortSet_CapacityZero() { ShortSet s1 = new ShortSet(0); Assert.AreEqual(0, s1.Count()); s1.Not(); Assert.AreEqual(0, s1.Count()); ShortSet s2 = new ShortSet(10); s2.Not(); s1.And(s2); s1.Or(s2); Assert.AreEqual(0, s1.Count()); }
public void ShortSet_MismatchedCapacities() { ShortSet s1 = new ShortSet(10); s1.Add(1); s1.Add(3); ShortSet s2 = new ShortSet(20); s2.Add(2); s2.Add(4); s2.Add(10); // Verify values below common capacity are set, larger values not set. s1.Or(s2); Assert.AreEqual("1, 2, 3, 4", String.Join(", ", s1.Values)); // Verify values above common capacity are left alone. s2.Or(s1); Assert.AreEqual("1, 2, 3, 4, 10", String.Join(", ", s2.Values)); // Verify values below common capacity are cleared, values above not unexpectedly set. s1.AndNot(s2); Assert.AreEqual("", String.Join(", ", s1.Values)); // Verify values above common capacity are left set s1.Clear(); s1.Add(1); s1.Add(3); s2.AndNot(s1); Assert.AreEqual("2, 4, 10", String.Join(", ", s2.Values)); // Verify values above common capacity are not set s2.Or(s1); s1.And(s2); Assert.AreEqual("1, 3", String.Join(", ", s1.Values)); // Verify values above common capacity *are* cleared) s2.And(s1); Assert.AreEqual("1, 3", String.Join(", ", s2.Values)); }
public AggregationResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } Stopwatch w = Stopwatch.StartNew(); AggregationResult result = new AggregationResult(this); result.AggregationContext = this.Aggregator.CreateContext(); // Get any columns passed to the aggregation function IUntypedColumn[] columns = null; if (this.AggregationColumns != null) { columns = new IUntypedColumn[this.AggregationColumns.Length]; for (int i = 0; i < this.AggregationColumns.Length; ++i) { string columnName = this.AggregationColumns[i]; if (!p.Columns.TryGetValue(columnName, out columns[i])) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, columnName); return(result); } } } // Find the number of dimensions and number of "cells" for which we'll aggregate List <string> resultBlockColumns = new List <string>(); int rowCount = 1; for (int i = 0; i < this.Dimensions.Count; ++i) { AggregationDimension dimension = this.Dimensions[i]; if (!String.IsNullOrEmpty(dimension.Name)) { resultBlockColumns.Add(dimension.Name); } else { resultBlockColumns.Add(StringExtensions.Format("Dimension {0}", i + 1)); } rowCount *= (dimension.GroupByWhere.Count + 1); } resultBlockColumns.Add("Aggregate"); // Create the DataBlock to hold the final results result.Values = new DataBlock(resultBlockColumns, rowCount); // Find the set of items in the base query ShortSet baseWhereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, baseWhereSet, result.Details); result.Total = baseWhereSet.Count(); // If this is only one dimension, use only one ShortSet and aggregate as we go if (this.Dimensions.Count == 1) { AggregationDimension dimension = this.Dimensions[0]; ShortSet setForDimension = new ShortSet(p.Count); int nextBlockRow = 0; foreach (IExpression dimensionValue in dimension.GroupByWhere) { // Get the set for this value intersected with the base set setForDimension.Clear(); dimensionValue.TryEvaluate(p, setForDimension, result.Details); setForDimension.And(baseWhereSet); // Compute and store the aggregate value if (!setForDimension.IsEmpty()) { result.Values[nextBlockRow, 1] = this.Aggregator.Aggregate(result.AggregationContext, setForDimension, columns); } nextBlockRow++; } // Add the total result.Values[nextBlockRow, 1] = this.Aggregator.Aggregate(result.AggregationContext, baseWhereSet, columns); } else { // Compute the set of items actually matching each dimension-value List <List <Tuple <IExpression, ShortSet> > > allDimensionValueSets = new List <List <Tuple <IExpression, ShortSet> > >(); foreach (AggregationDimension dimension in this.Dimensions) { List <Tuple <IExpression, ShortSet> > dimensionSet = new List <Tuple <IExpression, ShortSet> >(); // Add one item for each value in this dimension foreach (IExpression dimensionValue in dimension.GroupByWhere) { ShortSet setForDimensionValue = new ShortSet(p.Count); dimensionValue.TryEvaluate(p, setForDimensionValue, result.Details); dimensionSet.Add(new Tuple <IExpression, ShortSet>(dimensionValue, setForDimensionValue)); } // Add one 'Total row' item dimensionSet.Add(new Tuple <IExpression, ShortSet>(new AllExpression(), baseWhereSet)); allDimensionValueSets.Add(dimensionSet); } // Run the aggregator over the items AggregateAllDimensionsFlat(result.AggregationContext, result.Values, p.Count, baseWhereSet, allDimensionValueSets, columns, this.Aggregator); } // Add the dimension names to the result if this is the only partition; otherwise, merge will add it if (p.Mask.Equals(PartitionMask.All)) { AddDimensionsToBlock(result.Values); } // Capture timing and return result.Runtime = w.Elapsed; return(result); }
public void ShortSet_Basic() { // Constructor ShortSet s1 = new ShortSet(100); // Empty Assert.AreEqual("", String.Join(", ", s1.Values)); // Set value and enumerate s1.Add(0); Assert.AreEqual("0", String.Join(", ", s1.Values)); // Set additional values s1.Add(15); s1.Add(64); Assert.AreEqual("0, 15, 64", String.Join(", ", s1.Values)); // Clear values s1.Remove(64); Assert.AreEqual("0, 15", String.Join(", ", s1.Values)); // Or ShortSet s2 = new ShortSet(120); s2.Or(new ushort[] { 0, 1, 2 }); s1.Or(s2); Assert.AreEqual("0, 1, 2, 15", String.Join(", ", s1.Values)); Assert.AreEqual("0, 1, 2", String.Join(", ", s2.Values)); Verify.Exception <ArgumentNullException>(() => s1.Or((ShortSet)null)); Verify.Exception <ArgumentNullException>(() => s1.Or((IEnumerable <ushort>)null)); // OrNot [only 15, 16 not set, so only they should be added] ShortSet s3 = new ShortSet(100); s3.Not(); s3.Remove(15); s3.Remove(16); s1.OrNot(s3); Assert.AreEqual("0, 1, 2, 15, 16", String.Join(", ", s1.Values)); Verify.Exception <ArgumentNullException>(() => s1.OrNot((ShortSet)null)); // And s1.And(s2); s1.And(new ushort[] { 1, 2 }); Assert.AreEqual("1, 2", String.Join(", ", s1.Values)); s1.And(new ushort[] { 1 }); Assert.AreEqual("1", String.Join(", ", s1.Values)); Verify.Exception <ArgumentNullException>(() => s1.And((ShortSet)null)); Verify.Exception <ArgumentNullException>(() => s1.And((IEnumerable <ushort>)null)); // AndNot s1.Add(96); s1.Add(64); s1.AndNot(s2); s1.AndNot(new ushort[] { 96 }); Assert.AreEqual("64", String.Join(", ", s1.Values)); Verify.Exception <ArgumentNullException>(() => s1.AndNot((ShortSet)null)); Verify.Exception <ArgumentNullException>(() => s1.AndNot((IEnumerable <ushort>)null)); // Clear s1.Clear(); Assert.AreEqual("", String.Join(", ", s1.Values)); // From s1.From(s2); Assert.AreEqual("0, 1, 2", String.Join(", ", s1.Values)); Verify.Exception <ArgumentNullException>(() => s1.From((ShortSet)null)); // FromAnd ShortSet s4 = new ShortSet(100); s4.Or(new ushort[] { 1, 2, 3 }); s1.Clear(); s1.Not(); s1.FromAnd(s2, s4); Assert.AreEqual("1, 2", String.Join(", ", s1.Values)); Verify.Exception <ArgumentNullException>(() => s1.FromAnd((ShortSet)null, s2)); Verify.Exception <ArgumentNullException>(() => s1.FromAnd(s2, (ShortSet)null)); // ToString Assert.AreEqual("[1, 2]", s1.ToString()); }
public override DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the base where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); // Capture the total of the base query result.Total = whereSet.Count(); // Add a prefix filter for the prefix so far, if any and the column can prefix match if (!String.IsNullOrEmpty(this.ValuePrefix)) { ExecutionDetails prefixDetails = new ExecutionDetails(); ShortSet prefixSet = new ShortSet(p.Count); new TermExpression(this.Column, Operator.StartsWith, this.ValuePrefix).TryEvaluate(p, prefixSet, prefixDetails); if (prefixDetails.Succeeded) { whereSet.And(prefixSet); } } if (result.Details.Succeeded) { // Count the occurences of each value Dictionary <object, int> countByValue = new Dictionary <object, int>(); IUntypedColumn column = p.Columns[this.Column]; for (int i = 0; i < column.Count; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { object value = column[lid]; int count; countByValue.TryGetValue(value, out count); countByValue[value] = count + 1; } } // Convert the top this.Count rows by count into a DataBlock result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count); result.AllValuesReturned = result.Values.RowCount == countByValue.Count; } return(result); }