public Schema.Column[] GetOutputColumns() { var info = new Schema.Column[1]; info[0] = new Schema.Column(_outputColName, _outputColType, null); return(info); }
protected override Schema.Column[] GetOutputColumnsCore() { var info = new Schema.Column[1]; info[0] = new Schema.Column(_outputColName, _outputColType, null); return(info); }
public void TestCreateDropColumn(Type runnerType) { var runner = (IQueryRunner)Provider.GetRequiredService(runnerType); ResetDb(runner); var sqlRunner = (SqlQueryRunner)runner; var parser = new SqlSchemaParser(); var column = new Schema.Column() { SqlName = "COL_TEST", BaseType = typeof(int), Nullable = true, OriginalType = typeof(int?), SqlType = new Schema.SqlTypeInfo(), }; var addColumn = parser.ParseAddColumn(DB.Units, column); sqlRunner.ExecuteNonQuery(new List <SqlStatement>() { addColumn }, new List <KeyValuePair <string, object> >()); var dropColumn = parser.ParseDropColumn(DB.Units, column); sqlRunner.ExecuteNonQuery(new List <SqlStatement>() { dropColumn }, new List <KeyValuePair <string, object> >()); }
protected override Schema.Column[] GetOutputColumnsCore() { var info = new Schema.Column[_parent.Outputs.Length]; for (int i = 0; i < _parent.Outputs.Length; i++) { info[i] = new Schema.Column(_parent.Outputs[i], _parent.OutputTypes[i], null); } return(info); }
public Schema.Column[] GetOutputColumns() { var meta = new Schema.Metadata.Builder(); meta.AddSlotNames(_parent._outputLength, GetSlotNames); var info = new Schema.Column[1]; info[0] = new Schema.Column(_parent.OutputColumnName, new VectorType(NumberType.R8, _parent._outputLength), meta.GetMetadata()); return(info); }
public void Setup() { var ctx = new MLContext(); var builder = new ArrayDataViewBuilder(ctx); int[] values = new int[Length]; for (int i = 0; i < values.Length; ++i) { values[i] = i; } builder.AddColumn("A", NumberType.I4, values); var dv = builder.GetDataView(); var cacheDv = ctx.Data.Cache(dv); var col = cacheDv.Schema.GetColumnOrNull("A").Value; // First do one pass through. using (var cursor = cacheDv.GetRowCursor(colIndex => colIndex == col.Index)) { var getter = cursor.GetGetter <int>(col.Index); int val = 0; int count = 0; while (cursor.MoveNext()) { getter(ref val); if (val != cursor.Position) { throw new Exception($"Unexpected value {val} at {cursor.Position}"); } count++; } if (count != Length) { throw new Exception($"Expected {Length} values in cache but only saw {count}"); } } _cacheDataView = cacheDv; // Only needed for seeker, but may as well set it. _positions = new long[Length]; var rand = new Random(0); for (int i = 0; i < _positions.Length; ++i) { _positions[i] = rand.Next(Length); } _col = _cacheDataView.Schema.GetColumnOrNull("A").Value; _seeker = ((IRowSeekable)_cacheDataView).GetSeeker(colIndex => colIndex == _col.Index); _seekerGetter = _seeker.GetGetter <int>(_col.Index); }
/// <summary> /// For PCA, the transform equation is y=U^Tx, where "^T" denotes matrix transpose, x is an 1-D vector (i.e., the input column), and U=[u_1, ..., u_PcaNum] /// is a n-by-PcaNum matrix. The symbol u_k is the k-th largest (in terms of the associated eigenvalue) eigenvector of (1/m)*\sum_{i=1}^m x_ix_i^T, /// where x_i is the whitened column at the i-th row and we have m rows in the training data. /// For ZCA, the transform equation is y = US^{-1/2}U^Tx, where U=[u_1, ..., u_n] (we retain all eigenvectors) and S is a diagonal matrix whose i-th /// diagonal element is the eigenvalues of u_i. The first U^Tx rotates x to another linear space (bases are u_1, ..., u_n), then S^{-1/2} is applied /// to ensure unit variance, and finally we rotate the scaled result back to the original space using U (note that UU^T is identity matrix so U is /// the inverse rotation of U^T). /// </summary> public override Schema.Column[] GetOutputColumns() { var result = new Schema.Column[_parent.ColumnPairs.Length]; for (int iinfo = 0; iinfo < _parent.ColumnPairs.Length; iinfo++) { InputSchema.TryGetColumnIndex(_parent.ColumnPairs[iinfo].input, out int colIndex); Host.Assert(colIndex >= 0); var info = _parent._columns[iinfo]; ColumnType outType = (info.Kind == WhiteningKind.Pca && info.PcaNum > 0) ? new VectorType(NumberType.Float, info.PcaNum) : _srcTypes[iinfo]; result[iinfo] = new Schema.Column(_parent.ColumnPairs[iinfo].output, outType, null); } return(result); }
public override void DetermineDataType(Plan plan) { DetermineModifiers(plan); _dataType = new Schema.RowType(); foreach (Schema.Column column in SourceRowType.Columns) { DataType.Columns.Add(column.Copy()); } int index = 0; _redefineColumnOffsets = new int[_expressions.Count]; plan.EnterRowContext(); try { plan.Symbols.Push(new Symbol(String.Empty, SourceRowType)); try { // Add a column for each expression PlanNode planNode; Schema.Column sourceColumn; Schema.Column newColumn; foreach (NamedColumnExpression column in _expressions) { int sourceColumnIndex = DataType.Columns.IndexOf(column.ColumnAlias); if (sourceColumnIndex < 0) { throw new CompilerException(CompilerException.Codes.UnknownIdentifier, column, column.ColumnAlias); } sourceColumn = DataType.Columns[sourceColumnIndex]; planNode = Compiler.CompileExpression(plan, column.Expression); newColumn = new Schema.Column(sourceColumn.Name, planNode.DataType); DataType.Columns[sourceColumnIndex] = newColumn; _redefineColumnOffsets[index] = sourceColumnIndex; Nodes.Add(planNode); index++; } } finally { plan.Symbols.Pop(); } } finally { plan.ExitRowContext(); } }
public override void DetermineDataType(Plan plan) { DetermineModifiers(plan); _dataType = new Schema.RowType(); if (_expressions == null) { // Inherit columns foreach (Schema.Column column in SourceRowType.Columns) { DataType.Columns.Add(new Schema.Column(Schema.Object.Qualify(column.Name, _rowAlias), column.DataType)); } } else { bool columnAdded; Schema.Column column; int renameColumnIndex; for (int index = 0; index < SourceRowType.Columns.Count; index++) { columnAdded = false; foreach (RenameColumnExpression renameColumn in _expressions) { renameColumnIndex = SourceRowType.Columns.IndexOf(renameColumn.ColumnName); if (renameColumnIndex < 0) { throw new Schema.SchemaException(Schema.SchemaException.Codes.ObjectNotFound, renameColumn.ColumnName); } else if (renameColumnIndex == index) { if (columnAdded) { throw new CompilerException(CompilerException.Codes.DuplicateRenameColumn, renameColumn.ColumnName); } column = new Schema.Column(renameColumn.ColumnAlias, SourceRowType.Columns[index].DataType); DataType.Columns.Add(column); columnAdded = true; } } if (!columnAdded) { DataType.Columns.Add(SourceRowType.Columns[index].Copy()); } } } }
public override void DetermineDataType(Plan plan) { DetermineModifiers(plan); _dataType = new Schema.RowType(); foreach (Schema.Column column in SourceRowType.Columns) { DataType.Columns.Add(column.Copy()); } _extendColumnOffset = DataType.Columns.Count; plan.EnterRowContext(); try { plan.Symbols.Push(new Symbol(String.Empty, SourceRowType)); try { // Add a column for each expression PlanNode planNode; Schema.Column newColumn; foreach (NamedColumnExpression column in _expressions) { planNode = Compiler.CompileExpression(plan, column.Expression); newColumn = new Schema.Column ( column.ColumnAlias, planNode.DataType ); DataType.Columns.Add(newColumn); Nodes.Add(planNode); } } finally { plan.Symbols.Pop(); } } finally { plan.ExitRowContext(); } }
internal static Schema.Column[] GetSchemaColumns(InternalSchemaDefinition schemaDefn) { Contracts.AssertValue(schemaDefn); var columns = new Schema.Column[schemaDefn.Columns.Length]; for (int i = 0; i < columns.Length; i++) { var col = schemaDefn.Columns[i]; var meta = new Schema.Metadata.Builder(); foreach (var kvp in col.Metadata) { meta.Add(new Schema.Column(kvp.Value.Kind, kvp.Value.MetadataType, null), kvp.Value.GetGetterDelegate()); } columns[i] = new Schema.Column(col.ColumnName, col.ColumnType, meta.GetMetadata()); } return(columns); }
/// <summary> /// Checks whether a column kind in a <see cref="RoleMappedData"/> is unique, and its type /// is a <see cref="DataKind.U4"/> key of known cardinality. /// </summary> /// <param name="data">The training examples</param> /// <param name="role">The column role to try to extract</param> /// <param name="col">The extracted schema column</param> /// <param name="isDecode">Whether a non-user error should be thrown as a decode</param> /// <returns>The type cast to a key-type</returns> private static KeyType CheckRowColumnType(RoleMappedData data, RoleMappedSchema.ColumnRole role, out Schema.Column col, bool isDecode) { Contracts.AssertValue(data); Contracts.AssertValue(role.Value); const string format2 = "There should be exactly one column with role {0}, but {1} were found instead"; if (!data.Schema.HasUnique(role)) { int kindCount = Utils.Size(data.Schema.GetColumns(role)); if (isDecode) { throw Contracts.ExceptDecode(format2, role.Value, kindCount); } throw Contracts.Except(format2, role.Value, kindCount); } col = data.Schema.GetColumns(role)[0]; // REVIEW tfinley: Should we be a bit less restrictive? This doesn't seem like // too terrible of a restriction. const string format = "Column '{0}' with role {1} should be a known cardinality U4 key, but is instead '{2}'"; KeyType keyType; if (!TryMarshalGoodRowColumnType(col.Type, out keyType)) { if (isDecode) { throw Contracts.ExceptDecode(format, col.Name, role.Value, col.Type); } throw Contracts.Except(format, col.Name, role.Value, col.Type); } return(keyType); }
/// <summary> /// Check if the considered data, <see cref="RoleMappedData"/>, contains column roles specified by <see cref="MatrixColumnIndexKind"/> and <see cref="MatrixRowIndexKind"/>. /// If the column roles, <see cref="MatrixColumnIndexKind"/> and <see cref="MatrixRowIndexKind"/>, uniquely exist in data, their <see cref="Schema.Column"/> would be assigned /// to the two out parameters below. /// </summary> /// <param name="data">The considered data being checked</param> /// <param name="matrixColumnIndexColumn">The schema column as the row in the input data</param> /// <param name="matrixRowIndexColumn">The schema column as the column in the input data</param> /// <param name="isDecode">Whether a non-user error should be thrown as a decode</param> public static void CheckAndGetMatrixIndexColumns(RoleMappedData data, out Schema.Column matrixColumnIndexColumn, out Schema.Column matrixRowIndexColumn, bool isDecode) { Contracts.AssertValue(data); CheckRowColumnType(data, MatrixColumnIndexKind, out matrixColumnIndexColumn, isDecode); CheckRowColumnType(data, MatrixRowIndexKind, out matrixRowIndexColumn, isDecode); }
/// <summary> /// Returns a .NET type corresponding to the static pipelines that would tend to represent this column. /// Generally this will return <c>null</c> if it simply does not recognize the type but might throw if /// there is something seriously wrong with it. /// </summary> /// <param name="col">The column</param> /// <returns>The .NET type for the static pipelines that should be used to reflect this type, given /// both the characteristics of the <see cref="ColumnType"/> as well as one or two crucial pieces of metadata</returns> private static Type GetTypeOrNull(Schema.Column col) { var t = col.Type; Type vecType = null; if (t is VectorType vt) { vecType = vt.VectorSize > 0 ? typeof(Vector <>) : typeof(VarVector <>); // Check normalized subtype of vectors. if (vt.VectorSize > 0) { // Check to see if the column is normalized. // Once we shift to metadata being a row globally we can also make this a bit more efficient: var meta = col.Metadata; if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.IsNormalized, out int normcol)) { var normtype = meta.Schema.GetColumnType(normcol); if (normtype == BoolType.Instance) { bool val = default; meta.GetGetter <bool>(normcol)(ref val); if (val) { vecType = typeof(NormVector <>); } } } } t = t.ItemType; // Fall through to the non-vector case to handle subtypes. } Contracts.Assert(!t.IsVector); if (t is KeyType kt) { Type physType = StaticKind(kt.RawKind); Contracts.Assert(physType == typeof(byte) || physType == typeof(ushort) || physType == typeof(uint) || physType == typeof(ulong)); var keyType = kt.Count > 0 ? typeof(Key <>) : typeof(VarKey <>); keyType = keyType.MakeGenericType(physType); if (kt.Count > 0) { // Check to see if we have key value metadata of the appropriate type, size, and whatnot. var meta = col.Metadata; if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.KeyValues, out int kvcolIndex)) { var kvcol = meta.Schema[kvcolIndex]; var kvType = kvcol.Type; if (kvType is VectorType kvVecType && kvVecType.Size == kt.Count) { Contracts.Assert(kt.Count > 0); var subtype = GetTypeOrNull(kvcol); if (subtype != null && subtype.IsGenericType) { var sgtype = subtype.GetGenericTypeDefinition(); if (sgtype == typeof(NormVector <>) || sgtype == typeof(Vector <>)) { var args = subtype.GetGenericArguments(); Contracts.Assert(args.Length == 1); keyType = typeof(Key <,>).MakeGenericType(physType, args[0]); } } } } } return(vecType?.MakeGenericType(keyType) ?? keyType); } if (t is PrimitiveType pt) { Type physType = StaticKind(pt.RawKind); // Though I am unaware of any existing instances, it is theoretically possible for a // primitive type to exist, have the same data kind as one of the existing types, and yet // not be one of the built in types. (For example, an outside analogy to the key types.) For this // reason, we must be certain that when we return here we are covering one fo the builtin types. if (physType != null && ( pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I8 || pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U8 || pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance || pt == DateTimeType.Instance || pt == DateTimeOffsetType.Instance || pt == TimeSpanType.Instance || pt == TextType.Instance)) { return((vecType ?? typeof(Scalar <>)).MakeGenericType(physType)); } } return(null); }
/// <summary> /// Create and initialize a new mapping which produces a flat target set with all primitive columns for copying primitive data from the source set. /// Only identity (PK) source columns are expanded recursively. /// For relational source, this means that all primitive columns of the source table will be mapped with their relational names, no FK-referenced tables will be joined and no artifical column names will be used. /// If it is necessary to expand entity columns (non-PK columns of joined tables) then a different implementation is needed (which will require joins, artifical column/path names etc.) /// </summary> public Mapping CreatePrimitive(DcTable sourceSet, DcTable targetSet, DcSchema targetSchema) { Debug.Assert(!sourceSet.IsPrimitive && !targetSet.IsPrimitive, "Wrong use: copy mapping can be created for only non-primitive tables."); Debug.Assert(targetSchema != null || targetSet.Schema != null, "Wrong use: target schema must be specified."); Mapping map = new Mapping(sourceSet, targetSet); DcSchema sourceSchema = map.SourceTab.Schema; if (targetSchema == null) { targetSchema = targetSet.Schema; } ColumnPath sp; ColumnPath tp; DcColumn td; PathMatch match; if (sourceSchema is SchemaOledb) { TableRel set = (TableRel)map.SourceTab; foreach (ColumnAtt att in set.GreaterPaths) { sp = new ColumnAtt(att); // Recommend matching target type (mapping primitive types) this.MapPrimitiveSet(att.Output, targetSchema); DcTable targetType = this.GetBestTargetSet(att.Output, targetSchema); td = new Schema.Column(att.RelationalColumnName, map.TargetTab, targetType, att.IsKey, false); tp = new ColumnPath(td); tp.Name = sp.Name; match = new PathMatch(sp, tp, 1.0); map.Matches.Add(match); } } else if (sourceSchema is SchemaCsv) { DcTable set = (DcTable)map.SourceTab; foreach (DcColumn sd in set.Columns) { if (sd.IsSuper) { continue; } // Recommend matching target type (mapping primitive types) //this.MapPrimitiveSet(sd, targetSchema); //ComTable targetType = this.GetBestTargetSet(sd.Output, targetSchema); // // Analyze sample values of sd and choose the most specific target type // List <string> values = ((ColumnCsv)sd).SampleValues; string targetTypeName; if (Com.Schema.Utils.isInt32(values.ToArray())) { targetTypeName = "Integer"; } else if (Com.Schema.Utils.isDouble(values.ToArray())) { targetTypeName = "Double"; } else { targetTypeName = "String"; } DcTable targetType = targetSchema.GetPrimitiveType(targetTypeName); td = targetSchema.Space.CreateColumn(sd.Name, map.TargetTab, targetType, sd.IsKey); sp = new ColumnPath(sd); tp = new ColumnPath(td); match = new PathMatch(sp, tp, 1.0); map.Matches.Add(match); } } return(map); }