public Schema.Column[] GetOutputColumns()
            {
                var info = new Schema.Column[1];

                info[0] = new Schema.Column(_outputColName, _outputColType, null);
                return(info);
            }
示例#2
0
            protected override Schema.Column[] GetOutputColumnsCore()
            {
                var info = new Schema.Column[1];

                info[0] = new Schema.Column(_outputColName, _outputColType, null);
                return(info);
            }
示例#3
0
        public void TestCreateDropColumn(Type runnerType)
        {
            var runner = (IQueryRunner)Provider.GetRequiredService(runnerType);

            ResetDb(runner);

            var sqlRunner = (SqlQueryRunner)runner;
            var parser    = new SqlSchemaParser();

            var column = new Schema.Column()
            {
                SqlName      = "COL_TEST",
                BaseType     = typeof(int),
                Nullable     = true,
                OriginalType = typeof(int?),
                SqlType      = new Schema.SqlTypeInfo(),
            };

            var addColumn = parser.ParseAddColumn(DB.Units, column);

            sqlRunner.ExecuteNonQuery(new List <SqlStatement>()
            {
                addColumn
            }, new List <KeyValuePair <string, object> >());

            var dropColumn = parser.ParseDropColumn(DB.Units, column);

            sqlRunner.ExecuteNonQuery(new List <SqlStatement>()
            {
                dropColumn
            }, new List <KeyValuePair <string, object> >());
        }
            protected override Schema.Column[] GetOutputColumnsCore()
            {
                var info = new Schema.Column[_parent.Outputs.Length];

                for (int i = 0; i < _parent.Outputs.Length; i++)
                {
                    info[i] = new Schema.Column(_parent.Outputs[i], _parent.OutputTypes[i], null);
                }
                return(info);
            }
示例#5
0
            public Schema.Column[] GetOutputColumns()
            {
                var meta = new Schema.Metadata.Builder();

                meta.AddSlotNames(_parent._outputLength, GetSlotNames);
                var info = new Schema.Column[1];

                info[0] = new Schema.Column(_parent.OutputColumnName, new VectorType(NumberType.R8, _parent._outputLength), meta.GetMetadata());
                return(info);
            }
        public void Setup()
        {
            var ctx     = new MLContext();
            var builder = new ArrayDataViewBuilder(ctx);

            int[] values = new int[Length];
            for (int i = 0; i < values.Length; ++i)
            {
                values[i] = i;
            }
            builder.AddColumn("A", NumberType.I4, values);
            var dv      = builder.GetDataView();
            var cacheDv = ctx.Data.Cache(dv);

            var col = cacheDv.Schema.GetColumnOrNull("A").Value;

            // First do one pass through.
            using (var cursor = cacheDv.GetRowCursor(colIndex => colIndex == col.Index))
            {
                var getter = cursor.GetGetter <int>(col.Index);
                int val    = 0;
                int count  = 0;
                while (cursor.MoveNext())
                {
                    getter(ref val);
                    if (val != cursor.Position)
                    {
                        throw new Exception($"Unexpected value {val} at {cursor.Position}");
                    }
                    count++;
                }
                if (count != Length)
                {
                    throw new Exception($"Expected {Length} values in cache but only saw {count}");
                }
            }
            _cacheDataView = cacheDv;

            // Only needed for seeker, but may as well set it.
            _positions = new long[Length];
            var rand = new Random(0);

            for (int i = 0; i < _positions.Length; ++i)
            {
                _positions[i] = rand.Next(Length);
            }

            _col          = _cacheDataView.Schema.GetColumnOrNull("A").Value;
            _seeker       = ((IRowSeekable)_cacheDataView).GetSeeker(colIndex => colIndex == _col.Index);
            _seekerGetter = _seeker.GetGetter <int>(_col.Index);
        }
            /// <summary>
            /// For PCA, the transform equation is y=U^Tx, where "^T" denotes matrix transpose, x is an 1-D vector (i.e., the input column), and U=[u_1, ..., u_PcaNum]
            /// is a n-by-PcaNum matrix. The symbol u_k is the k-th largest (in terms of the associated eigenvalue) eigenvector of (1/m)*\sum_{i=1}^m x_ix_i^T,
            /// where x_i is the whitened column at the i-th row and we have m rows in the training data.
            /// For ZCA, the transform equation is y = US^{-1/2}U^Tx, where U=[u_1, ..., u_n] (we retain all eigenvectors) and S is a diagonal matrix whose i-th
            /// diagonal element is the eigenvalues of u_i. The first U^Tx rotates x to another linear space (bases are u_1, ..., u_n), then S^{-1/2} is applied
            /// to ensure unit variance, and finally we rotate the scaled result back to the original space using U (note that UU^T is identity matrix so U is
            /// the inverse rotation of U^T).
            /// </summary>
            public override Schema.Column[] GetOutputColumns()
            {
                var result = new Schema.Column[_parent.ColumnPairs.Length];

                for (int iinfo = 0; iinfo < _parent.ColumnPairs.Length; iinfo++)
                {
                    InputSchema.TryGetColumnIndex(_parent.ColumnPairs[iinfo].input, out int colIndex);
                    Host.Assert(colIndex >= 0);
                    var        info    = _parent._columns[iinfo];
                    ColumnType outType = (info.Kind == WhiteningKind.Pca && info.PcaNum > 0) ? new VectorType(NumberType.Float, info.PcaNum) : _srcTypes[iinfo];
                    result[iinfo] = new Schema.Column(_parent.ColumnPairs[iinfo].output, outType, null);
                }
                return(result);
            }
示例#8
0
        public override void DetermineDataType(Plan plan)
        {
            DetermineModifiers(plan);
            _dataType = new Schema.RowType();

            foreach (Schema.Column column in SourceRowType.Columns)
            {
                DataType.Columns.Add(column.Copy());
            }

            int index = 0;

            _redefineColumnOffsets = new int[_expressions.Count];
            plan.EnterRowContext();
            try
            {
                plan.Symbols.Push(new Symbol(String.Empty, SourceRowType));
                try
                {
                    // Add a column for each expression
                    PlanNode      planNode;
                    Schema.Column sourceColumn;
                    Schema.Column newColumn;
                    foreach (NamedColumnExpression column in _expressions)
                    {
                        int sourceColumnIndex = DataType.Columns.IndexOf(column.ColumnAlias);
                        if (sourceColumnIndex < 0)
                        {
                            throw new CompilerException(CompilerException.Codes.UnknownIdentifier, column, column.ColumnAlias);
                        }

                        sourceColumn = DataType.Columns[sourceColumnIndex];
                        planNode     = Compiler.CompileExpression(plan, column.Expression);
                        newColumn    = new Schema.Column(sourceColumn.Name, planNode.DataType);
                        DataType.Columns[sourceColumnIndex] = newColumn;
                        _redefineColumnOffsets[index]       = sourceColumnIndex;
                        Nodes.Add(planNode);
                        index++;
                    }
                }
                finally
                {
                    plan.Symbols.Pop();
                }
            }
            finally
            {
                plan.ExitRowContext();
            }
        }
示例#9
0
        public override void DetermineDataType(Plan plan)
        {
            DetermineModifiers(plan);
            _dataType = new Schema.RowType();

            if (_expressions == null)
            {
                // Inherit columns
                foreach (Schema.Column column in SourceRowType.Columns)
                {
                    DataType.Columns.Add(new Schema.Column(Schema.Object.Qualify(column.Name, _rowAlias), column.DataType));
                }
            }
            else
            {
                bool          columnAdded;
                Schema.Column column;
                int           renameColumnIndex;
                for (int index = 0; index < SourceRowType.Columns.Count; index++)
                {
                    columnAdded = false;
                    foreach (RenameColumnExpression renameColumn in _expressions)
                    {
                        renameColumnIndex = SourceRowType.Columns.IndexOf(renameColumn.ColumnName);
                        if (renameColumnIndex < 0)
                        {
                            throw new Schema.SchemaException(Schema.SchemaException.Codes.ObjectNotFound, renameColumn.ColumnName);
                        }
                        else if (renameColumnIndex == index)
                        {
                            if (columnAdded)
                            {
                                throw new CompilerException(CompilerException.Codes.DuplicateRenameColumn, renameColumn.ColumnName);
                            }

                            column = new Schema.Column(renameColumn.ColumnAlias, SourceRowType.Columns[index].DataType);
                            DataType.Columns.Add(column);
                            columnAdded = true;
                        }
                    }
                    if (!columnAdded)
                    {
                        DataType.Columns.Add(SourceRowType.Columns[index].Copy());
                    }
                }
            }
        }
示例#10
0
        public override void DetermineDataType(Plan plan)
        {
            DetermineModifiers(plan);
            _dataType = new Schema.RowType();

            foreach (Schema.Column column in SourceRowType.Columns)
            {
                DataType.Columns.Add(column.Copy());
            }
            _extendColumnOffset = DataType.Columns.Count;

            plan.EnterRowContext();
            try
            {
                plan.Symbols.Push(new Symbol(String.Empty, SourceRowType));
                try
                {
                    // Add a column for each expression
                    PlanNode      planNode;
                    Schema.Column newColumn;
                    foreach (NamedColumnExpression column in _expressions)
                    {
                        planNode = Compiler.CompileExpression(plan, column.Expression);

                        newColumn =
                            new Schema.Column
                            (
                                column.ColumnAlias,
                                planNode.DataType
                            );

                        DataType.Columns.Add(newColumn);
                        Nodes.Add(planNode);
                    }
                }
                finally
                {
                    plan.Symbols.Pop();
                }
            }
            finally
            {
                plan.ExitRowContext();
            }
        }
        internal static Schema.Column[] GetSchemaColumns(InternalSchemaDefinition schemaDefn)
        {
            Contracts.AssertValue(schemaDefn);
            var columns = new Schema.Column[schemaDefn.Columns.Length];

            for (int i = 0; i < columns.Length; i++)
            {
                var col  = schemaDefn.Columns[i];
                var meta = new Schema.Metadata.Builder();
                foreach (var kvp in col.Metadata)
                {
                    meta.Add(new Schema.Column(kvp.Value.Kind, kvp.Value.MetadataType, null), kvp.Value.GetGetterDelegate());
                }
                columns[i] = new Schema.Column(col.ColumnName, col.ColumnType, meta.GetMetadata());
            }

            return(columns);
        }
示例#12
0
        /// <summary>
        /// Checks whether a column kind in a <see cref="RoleMappedData"/> is unique, and its type
        /// is a <see cref="DataKind.U4"/> key of known cardinality.
        /// </summary>
        /// <param name="data">The training examples</param>
        /// <param name="role">The column role to try to extract</param>
        /// <param name="col">The extracted schema column</param>
        /// <param name="isDecode">Whether a non-user error should be thrown as a decode</param>
        /// <returns>The type cast to a key-type</returns>
        private static KeyType CheckRowColumnType(RoleMappedData data, RoleMappedSchema.ColumnRole role, out Schema.Column col, bool isDecode)
        {
            Contracts.AssertValue(data);
            Contracts.AssertValue(role.Value);

            const string format2 = "There should be exactly one column with role {0}, but {1} were found instead";

            if (!data.Schema.HasUnique(role))
            {
                int kindCount = Utils.Size(data.Schema.GetColumns(role));
                if (isDecode)
                {
                    throw Contracts.ExceptDecode(format2, role.Value, kindCount);
                }
                throw Contracts.Except(format2, role.Value, kindCount);
            }
            col = data.Schema.GetColumns(role)[0];

            // REVIEW tfinley: Should we be a bit less restrictive? This doesn't seem like
            // too terrible of a restriction.
            const string format = "Column '{0}' with role {1} should be a known cardinality U4 key, but is instead '{2}'";
            KeyType      keyType;

            if (!TryMarshalGoodRowColumnType(col.Type, out keyType))
            {
                if (isDecode)
                {
                    throw Contracts.ExceptDecode(format, col.Name, role.Value, col.Type);
                }
                throw Contracts.Except(format, col.Name, role.Value, col.Type);
            }
            return(keyType);
        }
示例#13
0
 /// <summary>
 /// Check if the considered data, <see cref="RoleMappedData"/>, contains column roles specified by <see cref="MatrixColumnIndexKind"/> and <see cref="MatrixRowIndexKind"/>.
 /// If the column roles, <see cref="MatrixColumnIndexKind"/> and <see cref="MatrixRowIndexKind"/>, uniquely exist in data, their <see cref="Schema.Column"/> would be assigned
 /// to the two out parameters below.
 /// </summary>
 /// <param name="data">The considered data being checked</param>
 /// <param name="matrixColumnIndexColumn">The schema column as the row in the input data</param>
 /// <param name="matrixRowIndexColumn">The schema column as the column in the input data</param>
 /// <param name="isDecode">Whether a non-user error should be thrown as a decode</param>
 public static void CheckAndGetMatrixIndexColumns(RoleMappedData data, out Schema.Column matrixColumnIndexColumn, out Schema.Column matrixRowIndexColumn, bool isDecode)
 {
     Contracts.AssertValue(data);
     CheckRowColumnType(data, MatrixColumnIndexKind, out matrixColumnIndexColumn, isDecode);
     CheckRowColumnType(data, MatrixRowIndexKind, out matrixRowIndexColumn, isDecode);
 }
示例#14
0
        /// <summary>
        /// Returns a .NET type corresponding to the static pipelines that would tend to represent this column.
        /// Generally this will return <c>null</c> if it simply does not recognize the type but might throw if
        /// there is something seriously wrong with it.
        /// </summary>
        /// <param name="col">The column</param>
        /// <returns>The .NET type for the static pipelines that should be used to reflect this type, given
        /// both the characteristics of the <see cref="ColumnType"/> as well as one or two crucial pieces of metadata</returns>
        private static Type GetTypeOrNull(Schema.Column col)
        {
            var t = col.Type;

            Type vecType = null;

            if (t is VectorType vt)
            {
                vecType = vt.VectorSize > 0 ? typeof(Vector <>) : typeof(VarVector <>);
                // Check normalized subtype of vectors.
                if (vt.VectorSize > 0)
                {
                    // Check to see if the column is normalized.
                    // Once we shift to metadata being a row globally we can also make this a bit more efficient:
                    var meta = col.Metadata;
                    if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.IsNormalized, out int normcol))
                    {
                        var normtype = meta.Schema.GetColumnType(normcol);
                        if (normtype == BoolType.Instance)
                        {
                            bool val = default;
                            meta.GetGetter <bool>(normcol)(ref val);
                            if (val)
                            {
                                vecType = typeof(NormVector <>);
                            }
                        }
                    }
                }
                t = t.ItemType;
                // Fall through to the non-vector case to handle subtypes.
            }
            Contracts.Assert(!t.IsVector);

            if (t is KeyType kt)
            {
                Type physType = StaticKind(kt.RawKind);
                Contracts.Assert(physType == typeof(byte) || physType == typeof(ushort) ||
                                 physType == typeof(uint) || physType == typeof(ulong));
                var keyType = kt.Count > 0 ? typeof(Key <>) : typeof(VarKey <>);
                keyType = keyType.MakeGenericType(physType);

                if (kt.Count > 0)
                {
                    // Check to see if we have key value metadata of the appropriate type, size, and whatnot.
                    var meta = col.Metadata;
                    if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.KeyValues, out int kvcolIndex))
                    {
                        var kvcol  = meta.Schema[kvcolIndex];
                        var kvType = kvcol.Type;
                        if (kvType is VectorType kvVecType && kvVecType.Size == kt.Count)
                        {
                            Contracts.Assert(kt.Count > 0);
                            var subtype = GetTypeOrNull(kvcol);
                            if (subtype != null && subtype.IsGenericType)
                            {
                                var sgtype = subtype.GetGenericTypeDefinition();
                                if (sgtype == typeof(NormVector <>) || sgtype == typeof(Vector <>))
                                {
                                    var args = subtype.GetGenericArguments();
                                    Contracts.Assert(args.Length == 1);
                                    keyType = typeof(Key <,>).MakeGenericType(physType, args[0]);
                                }
                            }
                        }
                    }
                }
                return(vecType?.MakeGenericType(keyType) ?? keyType);
            }

            if (t is PrimitiveType pt)
            {
                Type physType = StaticKind(pt.RawKind);
                // Though I am unaware of any existing instances, it is theoretically possible for a
                // primitive type to exist, have the same data kind as one of the existing types, and yet
                // not be one of the built in types. (For example, an outside analogy to the key types.) For this
                // reason, we must be certain that when we return here we are covering one fo the builtin types.
                if (physType != null && (
                        pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I8 ||
                        pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U8 ||
                        pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance ||
                        pt == DateTimeType.Instance || pt == DateTimeOffsetType.Instance || pt == TimeSpanType.Instance ||
                        pt == TextType.Instance))
                {
                    return((vecType ?? typeof(Scalar <>)).MakeGenericType(physType));
                }
            }

            return(null);
        }
示例#15
0
        /// <summary>
        /// Create and initialize a new mapping which produces a flat target set with all primitive columns for copying primitive data from the source set.
        /// Only identity (PK) source columns are expanded recursively.
        /// For relational source, this means that all primitive columns of the source table will be mapped with their relational names, no FK-referenced tables will be joined and no artifical column names will be used.
        /// If it is necessary to expand entity columns (non-PK columns of joined tables) then a different implementation is needed (which will require joins, artifical column/path names etc.)
        /// </summary>
        public Mapping CreatePrimitive(DcTable sourceSet, DcTable targetSet, DcSchema targetSchema)
        {
            Debug.Assert(!sourceSet.IsPrimitive && !targetSet.IsPrimitive, "Wrong use: copy mapping can be created for only non-primitive tables.");
            Debug.Assert(targetSchema != null || targetSet.Schema != null, "Wrong use: target schema must be specified.");

            Mapping map = new Mapping(sourceSet, targetSet);

            DcSchema sourceSchema = map.SourceTab.Schema;

            if (targetSchema == null)
            {
                targetSchema = targetSet.Schema;
            }

            ColumnPath sp;
            ColumnPath tp;

            DcColumn td;

            PathMatch match;

            if (sourceSchema is SchemaOledb)
            {
                TableRel set = (TableRel)map.SourceTab;
                foreach (ColumnAtt att in set.GreaterPaths)
                {
                    sp = new ColumnAtt(att);

                    // Recommend matching target type (mapping primitive types)
                    this.MapPrimitiveSet(att.Output, targetSchema);
                    DcTable targetType = this.GetBestTargetSet(att.Output, targetSchema);

                    td      = new Schema.Column(att.RelationalColumnName, map.TargetTab, targetType, att.IsKey, false);
                    tp      = new ColumnPath(td);
                    tp.Name = sp.Name;

                    match = new PathMatch(sp, tp, 1.0);

                    map.Matches.Add(match);
                }
            }
            else if (sourceSchema is SchemaCsv)
            {
                DcTable set = (DcTable)map.SourceTab;
                foreach (DcColumn sd in set.Columns)
                {
                    if (sd.IsSuper)
                    {
                        continue;
                    }

                    // Recommend matching target type (mapping primitive types)
                    //this.MapPrimitiveSet(sd, targetSchema);
                    //ComTable targetType = this.GetBestTargetSet(sd.Output, targetSchema);

                    //
                    // Analyze sample values of sd and choose the most specific target type
                    //
                    List <string> values = ((ColumnCsv)sd).SampleValues;

                    string targetTypeName;
                    if (Com.Schema.Utils.isInt32(values.ToArray()))
                    {
                        targetTypeName = "Integer";
                    }
                    else if (Com.Schema.Utils.isDouble(values.ToArray()))
                    {
                        targetTypeName = "Double";
                    }
                    else
                    {
                        targetTypeName = "String";
                    }

                    DcTable targetType = targetSchema.GetPrimitiveType(targetTypeName);

                    td = targetSchema.Space.CreateColumn(sd.Name, map.TargetTab, targetType, sd.IsKey);

                    sp = new ColumnPath(sd);
                    tp = new ColumnPath(td);

                    match = new PathMatch(sp, tp, 1.0);

                    map.Matches.Add(match);
                }
            }

            return(map);
        }