private static void SetCountPerformance() { ShortSet s0 = new ShortSet(ushort.MaxValue); ShortSet s1 = new ShortSet(ushort.MaxValue); s1.Not(); ShortSet s2 = Arriba.Test.ShortSetTests.BuildRandom(ushort.MaxValue, 10000, new Random()); ushort value = 0; ushort value2 = 0; ushort value3 = 0; int iterations = 1000000; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { value = s0.Count(); value2 = s1.Count(); value3 = s2.Count(); //value = (ushort)ShortSet.CallOverheadTest(); //value2 = (ushort)ShortSet.CallOverheadTest(); //value3 = (ushort)ShortSet.CallOverheadTest(); } w.Stop(); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = iterations / milliseconds; Trace.Write(String.Format("{0:n0}, {1:n0}, {2:n0}\r\n", value, value2, value3)); Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.\r\n", iterations, milliseconds, operationsPerMillisecond)); }
private static object[] GetColumnSamples(Partition p, IUntypedColumn column, ShortSet whereSet, int matchCount) { // Get up to 500 samples int countToGet = Math.Min(500, matchCount); object[] samples = new object[countToGet]; Random r = new Random(); int sampleCount = 0; int countLeft = matchCount; for (int i = 0; i < p.Count && sampleCount < countToGet; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { double excludeChance = r.NextDouble(); int countNeeded = countToGet - sampleCount; // == if((countNeeded / countLeft) > excludeChance) if (countNeeded > (excludeChance * countLeft)) { samples[sampleCount] = column[lid]; sampleCount++; } countLeft--; } } return(samples); }
private static void FromAndPerformance() { ShortSet s0 = new ShortSet(ushort.MaxValue); ShortSet s1 = new ShortSet(ushort.MaxValue); s1.Not(); ShortSet s2 = Arriba.Test.ShortSetTests.BuildRandom(ushort.MaxValue, 10000, new Random()); s2.Count(); int iterations = 3000000; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { s0.FromAnd(s1, s2); } w.Stop(); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = iterations / milliseconds; Trace.Write(String.Format("{0:n0}\r\n", s0.Count())); Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.\r\n", iterations, milliseconds, operationsPerMillisecond)); }
private static string GetMatches(WordIndex index, string word) { ShortSet results = new ShortSet(ushort.MaxValue); index.WhereMatches(word, results); return(String.Join(", ", results.Values)); }
public void ShortSet_Performance_Enumerate() { // Goal: Enumerate is <200k instructions, so at 2M instructions per millisecond, 10 per millisecond (Release build) // Enumerate is used to walk set items when computing results in ORDER BY order. ShortSet s1 = BuildRandom(ushort.MaxValue, 1000, new Random()); ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, new Random()); int iterations = 500; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { ICollection <ushort> values1 = s1.Values; Assert.AreEqual(1000, values1.Count); ICollection <ushort> values2 = s2.Values; Assert.AreEqual(10000, values2.Count); } int operations = (2 * iterations); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = operations / milliseconds; Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond)); Assert.IsTrue(operationsPerMillisecond > 5, "Not within 200% of goal."); }
public void ShortSet_Performance_Count() { // Goal: Count is <10k instructions. 2GHz is 2B/sec, so 2M/ms, so 10k each means 200 iterations per ms (Release build) // Count is used for COUNT(*) aggregate and to compute IntelliSense rank for words in the word index. ShortSet s0 = new ShortSet(ushort.MaxValue); ShortSet s1 = new ShortSet(ushort.MaxValue); s1.Not(); ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, new Random()); int iterations = 10000; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { Assert.AreEqual(0, s0.Count()); Assert.AreEqual(ushort.MaxValue, s1.Count()); Assert.AreEqual(10000, s2.Count()); } double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = (3 * iterations) / milliseconds; Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", iterations, milliseconds, operationsPerMillisecond)); Assert.IsTrue(operationsPerMillisecond > 50, "Not within 200% of goal."); }
private void CheckTypeDetermination(string numericColumnTypeName) { // Create a numeric column with 0-10 in it IUntypedColumn column = ColumnFactory.Build(new ColumnDetails("Unused", numericColumnTypeName, 10), 0); column.SetSize(10); for (int i = 0; i < 10; ++i) { column[(ushort)i] = i; } // Include 0, 2, 4, 6, 8 in the results ShortSet matches = new ShortSet(10); for (int i = 0; i < 10; i += 2) { matches.Add((ushort)i); } // Ask for the Min and verify both the value and type are correct // This verifies the type checks in BaseAggregator.Aggregate determine type correctly MinAggregator aggregator = new MinAggregator(); object context = aggregator.CreateContext(); object result = aggregator.Aggregate(context, matches, new IUntypedColumn[] { column }); Assert.AreEqual(column[0], result); }
public DataBlockResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DataBlockResult result = new DataBlockResult(this); // Find matches for the remaining query ShortSet baseQueryMatches = new ShortSet(p.Count); this.Where.TryEvaluate(p, baseQueryMatches, result.Details); // Find and count matches per column for the term in the outer query List <Tuple <string, int> > matchCountPerColumn = new List <Tuple <string, int> >(); if (baseQueryMatches.Count() > 0) { TermExpression bareTerm = new TermExpression(this.Term); ShortSet termMatchesForColumn = new ShortSet(p.Count); bool succeeded = false; ExecutionDetails perColumnDetails = new ExecutionDetails(); foreach (IColumn <object> column in p.Columns.Values) { termMatchesForColumn.Clear(); perColumnDetails.Succeeded = true; column.TryWhere(Operator.Matches, this.Term, termMatchesForColumn, perColumnDetails); succeeded |= perColumnDetails.Succeeded; termMatchesForColumn.And(baseQueryMatches); ushort matchCount = termMatchesForColumn.Count(); if (matchCount > 0) { matchCountPerColumn.Add(new Tuple <string, int>(column.Name, (int)matchCount)); } } // Sort results by count of matches descending matchCountPerColumn.Sort((left, right) => right.Item2.CompareTo(left.Item2)); } // Copy to a DataBlock and return it int index = 0; DataBlock block = new DataBlock(new string[] { "ColumnName", "Count" }, matchCountPerColumn.Count); foreach (var column in matchCountPerColumn) { block[index, 0] = column.Item1; block[index, 1] = column.Item2; index++; } result.Values = block; result.Total = baseQueryMatches.Count(); return(result); }
public void ShortSet_CapacityHandling() { // Verify values above capacity are not reported, even if there are bits for them ShortSet s1 = new ShortSet(10); s1.Not(); Assert.AreEqual("0, 1, 2, 3, 4, 5, 6, 7, 8, 9", String.Join(", ", s1.Values)); // Verify the last value is not truncated if the this set in an operation is larger, // and that values above the other capacity aren't involved in operations. ShortSet s2 = new ShortSet(120); s2.Not(); ShortSet s3 = new ShortSet(64); s3.Not(); s2.And(s3); Assert.IsTrue(s2.Contains(63)); s2.Or(s3); Assert.IsTrue(s2.Contains(63)); s3.Not(); s2.AndNot(s3); Assert.IsTrue(s2.Contains(63)); }
public void TryEvaluate(Partition partition, ShortSet result, ExecutionDetails details) { if (details == null) { throw new ArgumentNullException("details"); } if (result == null) { throw new ArgumentNullException("result"); } if (partition == null) { throw new ArgumentNullException("partition"); } if (!partition.ContainsColumn(this.ColumnName)) { details.AddError(ExecutionDetails.ColumnDoesNotExist, this.ColumnName); } else { IColumn <object> column = partition.Columns[this.ColumnName]; for (int i = 0; i < this.Values.Length; ++i) { column.TryWhere(this.Operator, this.Values.GetValue(i), result, details); } } }
public void TryWhere(Operator op, T value, ShortSet result, ExecutionDetails details) { // Base Column can't identify matches for any operator in bulk efficiently. if (details != null) { details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name); } }
public object Aggregate(object context, ShortSet matches, IUntypedColumn[] columns) { if (matches == null) { throw new ArgumentNullException("matches"); } return((ulong)matches.Count()); }
public void ShortSet_LeadingZeros() { Assert.AreEqual(0, ShortSet.LeadingZeros(~(0UL))); Assert.AreEqual(1, ShortSet.LeadingZeros(~(0UL) >> 1)); Assert.AreEqual(10, ShortSet.LeadingZeros(0x0038888888888888UL)); Assert.AreEqual(62, ShortSet.LeadingZeros(0x3UL)); Assert.AreEqual(64, ShortSet.LeadingZeros(0UL)); }
public DataBlockResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DataBlockResult result = new DataBlockResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); int matchCount = whereSet.Count(); result.Total = matchCount; if (result.Details.Succeeded && matchCount > 0) { // Get sample values object[] samples = GetColumnSamples(p, p.Columns[this.Column], whereSet, matchCount); // Sort them Array.Sort(samples); // Record the values corresponding to those percentiles result.Values = new DataBlock(new string[] { "Percentiles", "Values" }, this.Percentiles.Length); for (int i = 0; i < this.Percentiles.Length; ++i) { double percentile = this.Percentiles[i]; int sampleRow = (int)(percentile * samples.Length) - 1; if (sampleRow < 0) { sampleRow = 0; } if (sampleRow >= samples.Length) { sampleRow = samples.Length - 1; } result.Values[i, 0] = percentile; result.Values[i, 1] = samples[sampleRow]; } } return(result); }
public BooleanColumn(bool defaultValue) { this.DefaultValue = defaultValue; ushort recommendedSize = (ushort)ArrayExtensions.RecommendedSize(ArrayExtensions.MinimumSize, ArrayExtensions.MinimumSize, ushort.MaxValue); _trueItems = new ShortSet(recommendedSize); if (defaultValue == true) { _trueItems.Not(); } }
public override void TryWhere(Operator op, T value, ShortSet result, ExecutionDetails details) { RangeToScan range = new RangeToScan(); bool rangeOk = true; // For StartsWith, for ByteBlocks only, implement using IsPrefixOf if (op == Operator.StartsWith) { if (value is ByteBlock) { IComparable <T> prefixComparer = (IComparable <T>)((ByteBlock)(object)value).GetExtendedIComparable(ByteBlock.Comparison.IsPrefixOf); // trust me C#... I'm a professional... int first = FindFirstWhere(prefixComparer); int last = FindLastWhere(prefixComparer); if (!RangeToScan.TryBuild(Operator.Equals, first, last, this.Column.Count, ref range)) { rangeOk = false; if (details != null) { details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name); } } } else { rangeOk = false; if (details != null) { details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name); } } } else { int first = FindFirstWhere(value); int last = FindLastWhere(value); // Determine the range to scan to compute the result if (!RangeToScan.TryBuild(op, first, last, this.Column.Count, ref range)) { rangeOk = false; if (details != null) { details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name); } } } // Build the result set and return it if (rangeOk == true) { range.AddMatches(this.SortedIDs, result); } }
public override DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); if (result.Details.Succeeded) { // Count the occurences of each value Dictionary <object, int> countByValue = new Dictionary <object, int>(); IUntypedColumn column = p.Columns[this.Column]; int rowCount = 0; for (int i = 0; i < column.Count; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { object value = column[lid]; int count; countByValue.TryGetValue(value, out count); countByValue[value] = count + 1; rowCount++; } } // Convert the top this.Count rows by count into a DataBlock result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count); result.AllValuesReturned = result.Values.RowCount == countByValue.Count; result.Total = rowCount; } return(result); }
private static string GetMatches(IpRangeColumn col, Operator op, ByteBlock value) { ExecutionDetails details = new ExecutionDetails(); ShortSet result = new ShortSet(col.Count); col.TryWhere(op, value, result, details); if (!details.Succeeded) { return(null); } return(result.ToString()); }
/// <summary> /// Delete items from this Partition which meet the provided criteria. /// </summary> /// <param name="where">Expression matching items to delete</param> /// <param name="details">Details of execution</param> /// <returns>Result including number deleted</returns> public DeleteResult Delete(IExpression where) { if (where == null) { throw new ArgumentNullException("where"); } DeleteResult result = new DeleteResult(); // Find the set of items to delete ShortSet whereSet = new ShortSet(this.Count); where.TryEvaluate(this, whereSet, result.Details); if (result.Details.Succeeded) { // Swap each item to delete with the last item in the set ushort lastItemIndex = (ushort)(this.Count - 1); ushort[] itemsToDelete = whereSet.Values; for (int i = itemsToDelete.Length - 1; i >= 0; --i) { // If this isn't the last item, swap the last item with it ushort itemToDelete = itemsToDelete[i]; if (itemToDelete != lastItemIndex) { foreach (IColumn <object> c in this.Columns.Values) { c[itemToDelete] = c[lastItemIndex]; } } lastItemIndex--; } // Resize the set to exclude all of the deleted items (now at the end) foreach (IColumn <object> c in this.Columns.Values) { c.SetSize((ushort)(lastItemIndex + 1)); } // Record the new count in the Partition itself _itemCount = (ushort)(lastItemIndex + 1); // Return the count deleted result.Count = whereSet.Count(); } return(result); }
public virtual void TryEvaluate(Partition partition, ShortSet result, ExecutionDetails details) { if (details == null) { throw new ArgumentNullException("details"); } if (result == null) { throw new ArgumentNullException("result"); } if (partition == null) { throw new ArgumentNullException("partition"); } if (this.ColumnName.Equals("*")) { // '*' queries succeed if any column succeeds bool succeeded = false; ExecutionDetails perColumnDetails = new ExecutionDetails(); foreach (IColumn <object> column in partition.Columns.Values) { perColumnDetails.Succeeded = true; column.TryWhere(this.Operator, this.Value, result, perColumnDetails); succeeded |= perColumnDetails.Succeeded; } details.Succeeded &= succeeded; // If no column succeeded, report the full errors if (!succeeded) { details.Merge(perColumnDetails); } } else { if (!partition.ContainsColumn(this.ColumnName)) { details.AddError(ExecutionDetails.ColumnDoesNotExist, this.ColumnName); } else { partition.Columns[this.ColumnName].TryWhere(this.Operator, this.Value, result, details); } } }
public void TryWhere(Operator op, object value, ShortSet result, ExecutionDetails details) { T t; if (!TryConvert(value, out t)) { if (details != null) { details.AddError(ExecutionDetails.UnableToConvertType, value, this.Name, typeof(T).Name); } } else { _column.TryWhere(op, t, result, details); } }
public void ShortSet_CapacityZero() { ShortSet s1 = new ShortSet(0); Assert.AreEqual(0, s1.Count()); s1.Not(); Assert.AreEqual(0, s1.Count()); ShortSet s2 = new ShortSet(10); s2.Not(); s1.And(s2); s1.Or(s2); Assert.AreEqual(0, s1.Count()); }
public void TypedColumn_TimeSpan_Basic() { IColumn <object> c = ColumnFactory.Build(new ColumnDetails("Duration", "TimeSpan", null), 0); c.SetSize(10); c[0] = Value.Create(TimeSpan.FromMinutes(1)); c[1] = Value.Create("01:00:00"); c[2] = Value.Create("00:00:01"); c[3] = Value.Create("1"); CommitIfRequired(c); ShortSet longTimes = new ShortSet(c.Count); c.TryWhere(Operator.GreaterThan, TimeSpan.FromSeconds(30), longTimes, null); Assert.AreEqual("0, 1, 3", String.Join(", ", longTimes.Values)); }
public static string GetMatches <T>(IColumn <T> column, Operator op, T value) { ShortSet result = new ShortSet(column.Count); ExecutionDetails details = new ExecutionDetails(); // Get the matches column.TryWhere(op, value, result, details); // Return a distinct value if the evaluation was reported unsuccessful if (!details.Succeeded) { return(null); } // Return matches for successful evaluation return(String.Join(", ", result.Values)); }
public void Aggregator_BaseBehaviors() { AggregatorBaseBehaviors(new CountAggregator(), false); AggregatorBaseBehaviors(new SumAggregator()); AggregatorBaseBehaviors(new MinAggregator()); AggregatorBaseBehaviors(new MaxAggregator()); // Check BaseAggregator doesn't implement unexpected types or methods IUntypedColumn column = ColumnFactory.Build(new ColumnDetails("ID", "bool", false), 100); ShortSet sample = new ShortSet(100); sample.Or(new ushort[] { 1, 2, 3 }); IAggregator aggregator = new BaseAggregator(); Verify.Exception <NotImplementedException>(() => aggregator.Aggregate(null, sample, new IUntypedColumn[] { column })); Verify.Exception <NotImplementedException>(() => aggregator.Merge(null, new object[2])); }
public void ShortSet_Performance_Set() { // Goal: Set operations are <10k instructions, so at 2M instructions per millisecond, 200 per millisecond (Release build) // Set operations are used to combine where clauses and sets for specific words when word searching. Random r = new Random(); ShortSet s1 = BuildRandom(ushort.MaxValue, 1000, r); ShortSet s2 = BuildRandom(ushort.MaxValue, 10000, r); ShortSet s3 = BuildRandom(ushort.MaxValue, 50000, r); ushort[] s4 = { 1, 126, 950, 1024, 1025, 1670, 19240 }; ShortSet scratch = new ShortSet(ushort.MaxValue); // 9 Operations x 10k iterations = 90k operations. // Goal is 100ms. int iterations = 2500; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { // Singleton Operations / Reset scratch.Not(); scratch.Clear(); scratch.Or(s1); // Enumerable Operations scratch.And(s4); scratch.Or(s4); scratch.AndNot(s4); // ShortSet Operations scratch.Or(s2); scratch.And(s3); scratch.AndNot(s2); } int operations = (9 * iterations); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = operations / milliseconds; Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond)); Assert.IsTrue(operationsPerMillisecond > 100, "Not within 200% of goal."); }
public virtual DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } ushort countToReturnPerPartition = (this.Count == 0 || this.Count > ushort.MaxValue) ? ushort.MaxValue : (ushort)this.Count; DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); if (result.Details.Succeeded) { IUntypedColumn column = p.Columns[this.Column]; // Construct a helper object of the correct type to natively work with the column GetUniqueValuesWorker helper = NativeContainer.CreateTypedInstance <GetUniqueValuesWorker>(typeof(GetUniqueValuesWorker <>), column.ColumnType); bool allValuesReturned; Array uniqueValues = helper.GetUniqueValuesFromColumn(column.InnerColumn, whereSet, countToReturnPerPartition, out allValuesReturned); result.ColumnType = column.ColumnType; result.AllValuesReturned = allValuesReturned; // Build a DataBlock with the results and return it DataBlock resultValues = new DataBlock(new string[] { this.Column }, uniqueValues.GetLength(0)); resultValues.SetColumn(0, uniqueValues); result.Values = resultValues; } return(result); }
public void ShortSet_Performance_GetAndSet() { // Goal: set operations are <10 instructions, so at 2M instructions per millisecond, >200k per millisecond (Release build) // Get and Set are used when evaluating ORDER BY for small sets and for determining aggregates each item should be included within. Random r = new Random(); ShortSet s1 = BuildRandom(ushort.MaxValue, 10000, r); ShortSet s2 = BuildRandom(ushort.MaxValue, 1000, r); ushort[] getAndSetValues = s2.Values.ToArray(); // 1k values; 2k operations; 20M total int iterations = 10000; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { int length = getAndSetValues.Length; for (int j = 0; j < length; ++j) { ushort value = getAndSetValues[j]; //bool initial = s1[value]; //s1[value] = !initial; bool initial = s1.Contains(value); if (!initial) { s1.Add(value); } else { s1.Remove(value); } } } int operations = (2 * getAndSetValues.Length * iterations); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = operations / milliseconds; Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond)); Assert.IsTrue(operationsPerMillisecond > 75000, "Not within 200% of goal."); }
private static string AddMatches(RangeToScan range, ushort[] sortedIDs, ushort[] previousValues) { ShortSet resultSet = new ShortSet((ushort)range.Count); // If previous values are specified, add them if (previousValues != null) { for (int i = 0; i < previousValues.Length; ++i) { resultSet.Add(previousValues[i]); } } // Add matches for the range range.AddMatches(sortedIDs, resultSet); // Return the result as a string return(String.Join(", ", resultSet.Values)); }
public void TryEvaluate(Partition partition, ShortSet result, ExecutionDetails details) { if (details == null) { throw new ArgumentNullException("details"); } if (result == null) { throw new ArgumentNullException("result"); } if (partition == null) { throw new ArgumentNullException("partition"); } // Include all items - clear any and then add everything. // ShortSet will scope the set to the ID range valid within the data set. result.Clear(); result.Not(); }