/// <summary> /// Change the type of a column to a new type. Values are copied from the existing /// column to the new one, if value conversion is possible. /// </summary> /// <param name="details">Details with existing name and new other details</param> public void AlterColumn(ColumnDetails details) { if (details == null) { throw new ArgumentNullException("details"); } if (!this.Columns.ContainsKey(details.Name)) { throw new ArribaException(StringExtensions.Format("Column '{0}' does not exist; it can't be altered.", details.Name)); } // Get the old column and build the new one IUntypedColumn currentcolumn = this.Columns[details.Name]; IUntypedColumn replacementColumn = ColumnFactory.Build(details, currentcolumn.Count); // Size the new column and copy each value to it ushort count = this.Count; replacementColumn.SetSize(count); for (ushort i = 0; i < count; ++i) { replacementColumn[i] = currentcolumn[i]; } // Store the new column this.Columns[details.Name] = replacementColumn; this.DetailsByColumn[details.Name] = details; }
private void CheckTypeDetermination(string numericColumnTypeName) { // Create a numeric column with 0-10 in it IUntypedColumn column = ColumnFactory.Build(new ColumnDetails("Unused", numericColumnTypeName, 10), 0); column.SetSize(10); for (int i = 0; i < 10; ++i) { column[(ushort)i] = i; } // Include 0, 2, 4, 6, 8 in the results ShortSet matches = new ShortSet(10); for (int i = 0; i < 10; i += 2) { matches.Add((ushort)i); } // Ask for the Min and verify both the value and type are correct // This verifies the type checks in BaseAggregator.Aggregate determine type correctly MinAggregator aggregator = new MinAggregator(); object context = aggregator.CreateContext(); object result = aggregator.Aggregate(context, matches, new IUntypedColumn[] { column }); Assert.AreEqual(column[0], result); }
private static object[] GetColumnSamples(Partition p, IUntypedColumn column, ShortSet whereSet, int matchCount) { // Get up to 500 samples int countToGet = Math.Min(500, matchCount); object[] samples = new object[countToGet]; Random r = new Random(); int sampleCount = 0; int countLeft = matchCount; for (int i = 0; i < p.Count && sampleCount < countToGet; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { double excludeChance = r.NextDouble(); int countNeeded = countToGet - sampleCount; // == if((countNeeded / countLeft) > excludeChance) if (countNeeded > (excludeChance * countLeft)) { samples[sampleCount] = column[lid]; sampleCount++; } countLeft--; } } return(samples); }
/// <summary> /// Add a new column with the given details. Columns must be added before values can be set on them. /// </summary> /// <param name="details">Details of the column to add</param> /// <param name="initialCapacity">suggested initial capacity of the column</param> public void AddColumn(ColumnDetails details, ushort initialCapacity) { if (details == null) { throw new ArgumentNullException("details"); } if (this.Columns.ContainsKey(details.Name)) { if (!this.DetailsByColumn[details.Name].Type.Equals(details.Type)) { AlterColumn(details); return; } // If the column exists and type matches, we can only update side details (alias) this.DetailsByColumn[details.Name] = details; } else { if (details.IsPrimaryKey) { ColumnDetails idColumnDetails = this.IDColumn; if (idColumnDetails != null) { throw new ArribaException(StringExtensions.Format("Column '{0}' to be added is marked as the primary key but cannot be added because column '{1}' is already the primary key column.", details.Name, idColumnDetails.Name)); } } IUntypedColumn newColumn = ColumnFactory.Build(details, initialCapacity); this.Columns[details.Name] = newColumn; this.DetailsByColumn[details.Name] = details; newColumn.SetSize(_itemCount); } }
public void Partition_RoundTrip() { Partition p = new Partition(PartitionMask.All); AddSampleData(p); // Round-Trip and re-verify Partition p2 = new Partition(PartitionMask.All); using (SerializationContext context = new SerializationContext(new MemoryStream())) { p.WriteBinary(context); context.Stream.Seek(0, SeekOrigin.Begin); p2.ReadBinary(context); } // Verify all columns came back Assert.AreEqual(String.Join(", ", p.ColumnNames), String.Join(", ", p2.ColumnNames)); // Select top 3 bugs with Priority = 3 and [ID] <= 12000, order by [ID] SelectQuery query = new SelectQuery(); query.Columns = new string[] { "ID", "Priority" }; query.Count = 3; query.Where = SelectQuery.ParseWhere("Priority = 3 AND [ID] <= 12000"); SelectResult result = p2.Query(query); Assert.AreEqual(2, (int)result.Total); Assert.AreEqual("11999", result.Values[0, 0].ToString()); Assert.AreEqual("11643", result.Values[1, 0].ToString()); // Verify column details are consistent foreach (ColumnDetails cd in p.ColumnDetails) { ColumnDetails cd2 = p2.DetailsByColumn[cd.Name]; Assert.AreEqual(cd2.Name, cd.Name); Assert.AreEqual(cd2.Type, cd.Type); Assert.AreEqual(cd2.Alias, cd.Alias); Assert.AreEqual(cd2.IsPrimaryKey, cd.IsPrimaryKey); // Verify default values - defaults are serialized as string and null/empty aren't distinguishable, so we have to compare that way as well Assert.AreEqual((cd2.Default ?? String.Empty).ToString(), (cd.Default ?? String.Empty).ToString()); } // Verify columns themselves and raw data are consistent foreach (IUntypedColumn c in p.Columns.Values) { IUntypedColumn c2 = p2.Columns[c.Name]; Assert.AreEqual(c2.Name, c.Name); Assert.AreEqual(c2.Count, c.Count); Assert.AreEqual(c2.DefaultValue, c.DefaultValue); for (ushort i = 0; i < c.Count; ++i) { Assert.AreEqual(c2[i], c[i]); } } }
public void BindColorColumns(string sourceColumn, string targetColorColumn) { foreach (Partition p in GetPartitions()) { IUntypedColumn srcColumn = p.Columns[sourceColumn]; IUntypedColumn colorColumn = p.Columns[targetColorColumn]; (colorColumn.InnerColumn as ColorColumn).LookupColumn = (IColumn <short>)srcColumn.InnerColumn; } }
public override DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); if (result.Details.Succeeded) { // Count the occurences of each value Dictionary <object, int> countByValue = new Dictionary <object, int>(); IUntypedColumn column = p.Columns[this.Column]; int rowCount = 0; for (int i = 0; i < column.Count; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { object value = column[lid]; int count; countByValue.TryGetValue(value, out count); countByValue[value] = count + 1; rowCount++; } } // Convert the top this.Count rows by count into a DataBlock result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count); result.AllValuesReturned = result.Values.RowCount == countByValue.Count; result.Total = rowCount; } return(result); }
public void Partition_CustomColumnBasic() { CustomColumnSupport.RegisterCustomColumns(); ITable_CustomColumn( () => new Partition(PartitionMask.All), (tbl) => { tbl.AddColumn(new ColumnDetails("Color", "color", null)); IUntypedColumn bugIDColumn = (tbl as Partition).Columns["Priority"]; IUntypedColumn colorColumn = (tbl as Partition).Columns["Color"]; (colorColumn.InnerColumn as ColorColumn).LookupColumn = (IColumn <short>)bugIDColumn.InnerColumn; }); }
public void Aggregator_BaseBehaviors() { AggregatorBaseBehaviors(new CountAggregator(), false); AggregatorBaseBehaviors(new SumAggregator()); AggregatorBaseBehaviors(new MinAggregator()); AggregatorBaseBehaviors(new MaxAggregator()); // Check BaseAggregator doesn't implement unexpected types or methods IUntypedColumn column = ColumnFactory.Build(new ColumnDetails("ID", "bool", false), 100); ShortSet sample = new ShortSet(100); sample.Or(new ushort[] { 1, 2, 3 }); IAggregator aggregator = new BaseAggregator(); Verify.Exception <NotImplementedException>(() => aggregator.Aggregate(null, sample, new IUntypedColumn[] { column })); Verify.Exception <NotImplementedException>(() => aggregator.Merge(null, new object[2])); }
public virtual DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } ushort countToReturnPerPartition = (this.Count == 0 || this.Count > ushort.MaxValue) ? ushort.MaxValue : (ushort)this.Count; DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); if (result.Details.Succeeded) { IUntypedColumn column = p.Columns[this.Column]; // Construct a helper object of the correct type to natively work with the column GetUniqueValuesWorker helper = NativeContainer.CreateTypedInstance <GetUniqueValuesWorker>(typeof(GetUniqueValuesWorker <>), column.ColumnType); bool allValuesReturned; Array uniqueValues = helper.GetUniqueValuesFromColumn(column.InnerColumn, whereSet, countToReturnPerPartition, out allValuesReturned); result.ColumnType = column.ColumnType; result.AllValuesReturned = allValuesReturned; // Build a DataBlock with the results and return it DataBlock resultValues = new DataBlock(new string[] { this.Column }, uniqueValues.GetLength(0)); resultValues.SetColumn(0, uniqueValues); result.Values = resultValues; } return(result); }
public void FillPartitionColumn(Partition p, DataBlock.ReadOnlyDataBlock values, int columnIndex, ushort[] itemLIDs) { string columnName = values.Columns[columnIndex].Name; if (columnName.Equals(p.IDColumn.Name, StringComparison.OrdinalIgnoreCase)) { return; } IUntypedColumn untypedColumn = p.Columns[columnName]; IColumn <T> typedColumn = null; if (typeof(T) == untypedColumn.ColumnType) { typedColumn = (IColumn <T>)untypedColumn.InnerColumn; } for (int rowIndex = 0; rowIndex < values.RowCount; ++rowIndex) { T value = values.GetValueT <T>(rowIndex, columnIndex); // If the item is new and no LID was assigned, we don't set values if (itemLIDs[rowIndex] == ushort.MaxValue) { continue; } try { if (typedColumn != null) { typedColumn[itemLIDs[rowIndex]] = value; } else { untypedColumn[itemLIDs[rowIndex]] = value; } } catch (Exception ex) { throw new ArribaWriteException(values[rowIndex, 0], columnName, value, ex); } } }
public DataBlockResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DataBlockResult result = new DataBlockResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Verify we were able to get percentile values if (this.Buckets == null) { result.Details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, "percentile", this.Column); return(result); } // Find the set of items matching the where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); IUntypedColumn column = p.Columns[this.Column]; if (result.Details.Succeeded) { Bucketer bucketer = NativeContainer.CreateTypedInstance <Bucketer>(typeof(Bucketer <>), column.ColumnType); result.Values = bucketer.Bucket(column.InnerColumn, whereSet, this.Buckets, this.Inclusive); result.Total = whereSet.Count(); } return(result); }
public AggregationResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } Stopwatch w = Stopwatch.StartNew(); AggregationResult result = new AggregationResult(this); result.AggregationContext = this.Aggregator.CreateContext(); // Get any columns passed to the aggregation function IUntypedColumn[] columns = null; if (this.AggregationColumns != null) { columns = new IUntypedColumn[this.AggregationColumns.Length]; for (int i = 0; i < this.AggregationColumns.Length; ++i) { string columnName = this.AggregationColumns[i]; if (!p.Columns.TryGetValue(columnName, out columns[i])) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, columnName); return(result); } } } // Find the number of dimensions and number of "cells" for which we'll aggregate List <string> resultBlockColumns = new List <string>(); int rowCount = 1; for (int i = 0; i < this.Dimensions.Count; ++i) { AggregationDimension dimension = this.Dimensions[i]; if (!String.IsNullOrEmpty(dimension.Name)) { resultBlockColumns.Add(dimension.Name); } else { resultBlockColumns.Add(StringExtensions.Format("Dimension {0}", i + 1)); } rowCount *= (dimension.GroupByWhere.Count + 1); } resultBlockColumns.Add("Aggregate"); // Create the DataBlock to hold the final results result.Values = new DataBlock(resultBlockColumns, rowCount); // Find the set of items in the base query ShortSet baseWhereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, baseWhereSet, result.Details); result.Total = baseWhereSet.Count(); // If this is only one dimension, use only one ShortSet and aggregate as we go if (this.Dimensions.Count == 1) { AggregationDimension dimension = this.Dimensions[0]; ShortSet setForDimension = new ShortSet(p.Count); int nextBlockRow = 0; foreach (IExpression dimensionValue in dimension.GroupByWhere) { // Get the set for this value intersected with the base set setForDimension.Clear(); dimensionValue.TryEvaluate(p, setForDimension, result.Details); setForDimension.And(baseWhereSet); // Compute and store the aggregate value if (!setForDimension.IsEmpty()) { result.Values[nextBlockRow, 1] = this.Aggregator.Aggregate(result.AggregationContext, setForDimension, columns); } nextBlockRow++; } // Add the total result.Values[nextBlockRow, 1] = this.Aggregator.Aggregate(result.AggregationContext, baseWhereSet, columns); } else { // Compute the set of items actually matching each dimension-value List <List <Tuple <IExpression, ShortSet> > > allDimensionValueSets = new List <List <Tuple <IExpression, ShortSet> > >(); foreach (AggregationDimension dimension in this.Dimensions) { List <Tuple <IExpression, ShortSet> > dimensionSet = new List <Tuple <IExpression, ShortSet> >(); // Add one item for each value in this dimension foreach (IExpression dimensionValue in dimension.GroupByWhere) { ShortSet setForDimensionValue = new ShortSet(p.Count); dimensionValue.TryEvaluate(p, setForDimensionValue, result.Details); dimensionSet.Add(new Tuple <IExpression, ShortSet>(dimensionValue, setForDimensionValue)); } // Add one 'Total row' item dimensionSet.Add(new Tuple <IExpression, ShortSet>(new AllExpression(), baseWhereSet)); allDimensionValueSets.Add(dimensionSet); } // Run the aggregator over the items AggregateAllDimensionsFlat(result.AggregationContext, result.Values, p.Count, baseWhereSet, allDimensionValueSets, columns, this.Aggregator); } // Add the dimension names to the result if this is the only partition; otherwise, merge will add it if (p.Mask.Equals(PartitionMask.All)) { AddDimensionsToBlock(result.Values); } // Capture timing and return result.Runtime = w.Elapsed; return(result); }
public SelectResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } SelectResult result = new SelectResult(this.Query); // Find the set of items matching all terms ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); // Verify that the ORDER BY column exists if (!String.IsNullOrEmpty(this.OrderByColumn) && !p.Columns.ContainsKey(this.OrderByColumn)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.OrderByColumn); return(result); } if (result.Details.Succeeded) { IUntypedColumn column = null; result.Total = whereSet.Count(); // Find the set of IDs to return for the query (up to 'Count' after 'Skip' in ORDER BY order) ushort[] lidsToReturn = GetLIDsToReturn(p, this, result, whereSet); result.CountReturned = (ushort)lidsToReturn.Length; // Get the order-by column if (!p.Columns.TryGetValue(this.OrderByColumn, out column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.OrderByColumn); return(result); } Array orderByColumn = column.GetValues(lidsToReturn); // Get all of the response columns and return them Array columns = new Array[this.Columns.Count]; for (int i = 0; i < this.Columns.Count; ++i) { string columnName = this.Columns[i]; if (columnName == this.OrderByColumn) { columns.SetValue(orderByColumn, i); } else { if (!p.Columns.TryGetValue(columnName, out column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, columnName); return(result); } Array values = column.GetValues(lidsToReturn); if (Query.Highlighter != null) { Query.Highlighter.Highlight(values, column, Query); } columns.SetValue(values, i); } } result.Values = new DataBlock(p.GetDetails(this.Columns), result.CountReturned, columns); result.OrderByValues = new DataBlock(p.GetDetails(new string[] { this.OrderByColumn }), result.CountReturned, new Array[] { orderByColumn }); } return(result); }
public ushort[] FindOrAssignLIDs(Partition p, DataBlock.ReadOnlyDataBlock values, int idColumnIndex, AddOrUpdateMode mode) { // TODO: consider keeping one instance of the worker long term? if so, this becomes a private class field ValueTypeReference <T> vtr = new ValueTypeReference <T>(); Value v = Value.Create(null); ushort[] itemLIDs = new ushort[values.RowCount]; int addCount = 0; IUntypedColumn idColumn = p.Columns[p.IDColumn.Name]; IColumn <T> typedIdColumn = null; if (typeof(T) == idColumn.ColumnType) { typedIdColumn = (IColumn <T>)idColumn.InnerColumn; } for (int index = 0; index < values.RowCount; ++index) { // Look for the LIDs a T externalID = values.GetValueT <T>(index, idColumnIndex); if (typedIdColumn != null) { typedIdColumn.TryGetIndexOf(externalID, out itemLIDs[index]); } else { idColumn.TryGetIndexOf(externalID, out itemLIDs[index]); } if (itemLIDs[index] == ushort.MaxValue) { addCount++; } // Verify this item was routed to the right partition vtr.Value = externalID; v.Assign(vtr); int idHash = v.GetHashCode(); if (!p.Mask.Matches(idHash)) { throw new ArribaException(StringExtensions.Format("Item with ID '{0}', hash '{1:x}' incorrectly routed to Partition {2}.", externalID, idHash, p.Mask)); } } // Go back and add the items which need to be added in a batch if (mode != AddOrUpdateMode.UpdateAndIgnoreAdds) { Dictionary <T, ushort> newlyAssignedLIDs = null; for (int index = 0; index < values.RowCount; ++index) { T idValue = values.GetValueT <T>(index, idColumnIndex); ushort lid = itemLIDs[index]; // If this is an add... if (lid == ushort.MaxValue) { // If we have adds, we'll need to track new IDs if (newlyAssignedLIDs == null) { newlyAssignedLIDs = new Dictionary <T, ushort>(addCount); } T externalID = idValue; // If this ID was already added in this batch, this time it's an update if (newlyAssignedLIDs.TryGetValue(externalID, out lid) == false) { // If in "UpdateOnly" mode, throw if (mode == AddOrUpdateMode.UpdateOnly) { throw new ArribaWriteException(externalID, p.IDColumn.Name, externalID, new ArribaException("AddOrUpdate was in UpdateOnly mode but contained a new ID, which is an error.")); } // If this was a new item and not added in this batch, assign it a LID lid = p._itemCount; if (lid == ushort.MaxValue) { throw new ArribaWriteException(externalID, p.IDColumn.Name, externalID, new ArribaException("Column full in Partition. Unable to add items.")); } p._itemCount++; idColumn.SetSize((ushort)(p._itemCount)); if (typedIdColumn != null) { typedIdColumn[lid] = externalID; } else { idColumn[lid] = externalID; } newlyAssignedLIDs[externalID] = lid; } } itemLIDs[index] = lid; } // Commit the updates to the values column if the column requires it (FastAddSortedColumn does) if (idColumn is ICommittable) { (idColumn as ICommittable).Commit(); } } return(itemLIDs); }
public override DistinctResult Compute(Partition p) { if (p == null) { throw new ArgumentNullException("p"); } DistinctResult result = new DistinctResult(this); // Verify the column exists if (!p.ContainsColumn(this.Column)) { result.Details.AddError(ExecutionDetails.ColumnDoesNotExist, this.Column); return(result); } // Find the set of items matching the base where clause ShortSet whereSet = new ShortSet(p.Count); this.Where.TryEvaluate(p, whereSet, result.Details); // Capture the total of the base query result.Total = whereSet.Count(); // Add a prefix filter for the prefix so far, if any and the column can prefix match if (!String.IsNullOrEmpty(this.ValuePrefix)) { ExecutionDetails prefixDetails = new ExecutionDetails(); ShortSet prefixSet = new ShortSet(p.Count); new TermExpression(this.Column, Operator.StartsWith, this.ValuePrefix).TryEvaluate(p, prefixSet, prefixDetails); if (prefixDetails.Succeeded) { whereSet.And(prefixSet); } } if (result.Details.Succeeded) { // Count the occurences of each value Dictionary <object, int> countByValue = new Dictionary <object, int>(); IUntypedColumn column = p.Columns[this.Column]; for (int i = 0; i < column.Count; ++i) { ushort lid = (ushort)i; if (whereSet.Contains(lid)) { object value = column[lid]; int count; countByValue.TryGetValue(value, out count); countByValue[value] = count + 1; } } // Convert the top this.Count rows by count into a DataBlock result.Values = ToDataBlock(countByValue, this.Column, (int)this.Count); result.AllValuesReturned = result.Values.RowCount == countByValue.Count; } return(result); }