public void ShortSet_CapacityHandling() { // Verify values above capacity are not reported, even if there are bits for them ShortSet s1 = new ShortSet(10); s1.Not(); Assert.AreEqual("0, 1, 2, 3, 4, 5, 6, 7, 8, 9", String.Join(", ", s1.Values)); // Verify the last value is not truncated if the this set in an operation is larger, // and that values above the other capacity aren't involved in operations. ShortSet s2 = new ShortSet(120); s2.Not(); ShortSet s3 = new ShortSet(64); s3.Not(); s2.And(s3); Assert.IsTrue(s2.Contains(63)); s2.Or(s3); Assert.IsTrue(s2.Contains(63)); s3.Not(); s2.AndNot(s3); Assert.IsTrue(s2.Contains(63)); }
private static object[] GetColumnSamples(Partition p, IUntypedColumn column, ShortSet whereSet, int matchCount) { // Get up to 500 samples int countToGet = Math.Min(500, matchCount); object[] samples = new object[countToGet]; Random r = new Random(); int sampleCount = 0; int countLeft = matchCount; for (int i = 0; i < p.Count && sampleCount < countToGet; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { double excludeChance = r.NextDouble(); int countNeeded = countToGet - sampleCount; // == if((countNeeded / countLeft) > excludeChance) if (countNeeded > (excludeChance * countLeft)) { samples[sampleCount] = column[lid]; sampleCount++; } countLeft--; } } return(samples); }
public bool this[ushort lid] { get { return(_trueItems.Contains(lid)); } set { if (value) { _trueItems.Add(lid); } else { _trueItems.Remove(lid); } } }
public override DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); if (result.Details.Succeeded) { // Count the occurences of each value Dictionary <object, int> countByValue = new Dictionary <object, int>(); IUntypedColumn column = p.Columns[this.Column]; int rowCount = 0; for (int i = 0; i < column.Count; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { object value = column[lid]; int count; countByValue.TryGetValue(value, out count); countByValue[value] = count + 1; rowCount++; } } // Convert the top this.Count rows by count into a DataBlock result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count); result.AllValuesReturned = result.Values.RowCount == countByValue.Count; result.Total = rowCount; } return(result); }
private static ushort[] GetLIDsToReturnDense(Partition p, SelectContext context, SelectResult result, ShortSet whereSet) { // Get the sorted IDs, *if available* IList <ushort> sortedLIDs; int sortedLIDsCount; IColumn <object> orderByColumn = p.Columns[context.OrderByColumn]; if (!orderByColumn.TryGetSortedIndexes(out sortedLIDs, out sortedLIDsCount)) { return(GetLIDsToReturnSparse(p, context, result, whereSet)); } // Determine how many to return. Stop if none. int countToReturn = Math.Min(context.Count, (int)(result.Total)); ushort[] lidsToReturn = new ushort[countToReturn]; if (countToReturn == 0) { return(lidsToReturn); } // Enumerate matches in OrderBy order and return the requested columns for them ushort countAdded = 0; int sortedIndex = (context.OrderByDescending ? orderByColumn.Count - 1 : 0); int lastIndex = (context.OrderByDescending ? -1 : orderByColumn.Count); int step = (context.OrderByDescending ? -1 : 1); // Return the next 'count' matches for (; sortedIndex != lastIndex; sortedIndex += step) { ushort lid = sortedLIDs[sortedIndex]; if (whereSet.Contains(lid)) { lidsToReturn[countAdded] = lid; if (++countAdded == countToReturn) { break; } } } return(lidsToReturn); }
public void ShortSet_Performance_GetAndSet() { // Goal: set operations are <10 instructions, so at 2M instructions per millisecond, >200k per millisecond (Release build) // Get and Set are used when evaluating ORDER BY for small sets and for determining aggregates each item should be included within. Random r = new Random(); ShortSet s1 = BuildRandom(ushort.MaxValue, 10000, r); ShortSet s2 = BuildRandom(ushort.MaxValue, 1000, r); ushort[] getAndSetValues = s2.Values.ToArray(); // 1k values; 2k operations; 20M total int iterations = 10000; Stopwatch w = Stopwatch.StartNew(); for (int i = 0; i < iterations; ++i) { int length = getAndSetValues.Length; for (int j = 0; j < length; ++j) { ushort value = getAndSetValues[j]; //bool initial = s1[value]; //s1[value] = !initial; bool initial = s1.Contains(value); if (!initial) { s1.Add(value); } else { s1.Remove(value); } } } int operations = (2 * getAndSetValues.Length * iterations); double milliseconds = w.ElapsedMilliseconds; double operationsPerMillisecond = operations / milliseconds; Trace.Write(String.Format("{0:n0} operations in {1:n0} milliseconds; {2:n0} per millisecond.", operations, milliseconds, operationsPerMillisecond)); Assert.IsTrue(operationsPerMillisecond > 75000, "Not within 200% of goal."); }
public override void VerifyConsistency(VerificationLevel level, ExecutionDetails details) { base.VerifyConsistency(level, details); // Verify SortedIDCount agrees with ItemCount if (this.SortedIDCount != this.Count) { if (details != null) { details.AddError(ExecutionDetails.ColumnDoesNotHaveEnoughValues, this.Name, this.SortedIDCount, this.Count); } } // Verify that all IDs are in SortedIDs, all values are ordered, and no unexpected values are found ushort lastID = 0; IComparable lastValue = null; ShortSet idsInList = new ShortSet(this.Count); for (int i = 0; i < this.Count; ++i) { ushort id = this.SortedIDs[i]; if (id >= this.Count) { if (details != null) { details.AddError(ExecutionDetails.SortedIdOutOfRange, this.Name, id, this.Count); } } else if (idsInList.Contains(id)) { if (details != null) { details.AddError(ExecutionDetails.SortedIdAppearsMoreThanOnce, this.Name, id); } } else { idsInList.Add(id); IComparable value = (IComparable)this[id]; if (lastValue != null) { int compareResult = lastValue.CompareTo(value); if (compareResult > 0) { if (details != null) { details.AddError(ExecutionDetails.SortedValuesNotInOrder, this.Name, lastID, lastValue, id, value); } } } lastValue = value; lastID = id; } } idsInList.Not(); if (idsInList.Count() > 0) { if (details != null) { details.AddError(ExecutionDetails.SortedColumnMissingIDs, this.Name, String.Join(", ", idsInList.Values)); } } }
public override DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the base where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); // Capture the total of the base query result.Total = whereSet.Count(); // Add a prefix filter for the prefix so far, if any and the column can prefix match if (!String.IsNullOrEmpty(this.ValuePrefix)) { ExecutionDetails prefixDetails = new ExecutionDetails(); ShortSet prefixSet = new ShortSet(p.Count); new TermExpression(this.Column, Operator.StartsWith, this.ValuePrefix).TryEvaluate(p, prefixSet, prefixDetails); if (prefixDetails.Succeeded) { whereSet.And(prefixSet); } } if (result.Details.Succeeded) { // Count the occurences of each value Dictionary <object, int> countByValue = new Dictionary <object, int>(); IUntypedColumn column = p.Columns[this.Column]; for (int i = 0; i < column.Count; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { object value = column[lid]; int count; countByValue.TryGetValue(value, out count); countByValue[value] = count + 1; } } // Convert the top this.Count rows by count into a DataBlock result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count); result.AllValuesReturned = result.Values.RowCount == countByValue.Count; } return(result); }