Example #1
0
        private static void ComputeColumnMapping(Schema input, string[] names, out int[] colMap, out int[] mapIinfoToCol)
        {
            // To compute the column mapping information, first populate:
            // * _colMap[src] with the ~ of the iinfo that hides src (zero for none).
            // * _mapIinfoToCol[iinfo] with the ~ of the source column that iinfo hides (zero for none).
            colMap        = new int[input.Count + names.Length];
            mapIinfoToCol = new int[names.Length];
            for (int iinfo = 0; iinfo < names.Length; iinfo++)
            {
                var name = names[iinfo];
                int colHidden;
                if (input.TryGetColumnIndex(name, out colHidden))
                {
                    Contracts.Check(0 <= colHidden && colHidden < input.Count);
                    var str = input[colHidden].Name;
                    Contracts.Check(str == name);
                    Contracts.Check(colMap[colHidden] == 0);
                    mapIinfoToCol[iinfo] = ~colHidden;
                    colMap[colHidden]    = ~iinfo;
                }
            }

            // Now back-fill the column mapping.
            int colDst = colMap.Length;

            for (int iinfo = names.Length; --iinfo >= 0;)
            {
                Contracts.Assert(mapIinfoToCol[iinfo] <= 0);
                if (mapIinfoToCol[iinfo] == 0)
                {
                    colMap[--colDst]     = ~iinfo;
                    mapIinfoToCol[iinfo] = colDst;
                }
            }
            for (int colSrc = input.Count; --colSrc >= 0;)
            {
                Contracts.Assert(colMap[colSrc] <= 0);
                if (colMap[colSrc] < 0)
                {
                    Contracts.Assert(colDst > 1);
                    int iinfo = ~colMap[colSrc];
                    Contracts.Assert(0 <= iinfo && iinfo < names.Length);
                    Contracts.Assert(mapIinfoToCol[iinfo] == ~colSrc);
                    colMap[--colDst]     = ~iinfo;
                    mapIinfoToCol[iinfo] = colDst;
                }
                Contracts.Assert(colDst > 0);
                colMap[--colDst] = colSrc;
            }
            Contracts.Assert(colDst == 0);
        }
Example #2
0
            public static Bindings Create(OneToOneTransformBase parent, ModelLoadContext ctx, Schema inputSchema,
                ITransposeDataView transposeInput, Func<ColumnType, string> testType)
            {
                Contracts.AssertValue(parent);
                var host = parent.Host;
                host.CheckValue(ctx, nameof(ctx));
                host.AssertValue(inputSchema);
                host.AssertValueOrNull(transposeInput);
                host.AssertValueOrNull(testType);

                // *** Binary format ***
                // int: number of added columns
                // for each added column
                //   int: id of output column name
                //   int: id of input column name
                int cinfo = ctx.Reader.ReadInt32();
                host.CheckDecode(cinfo > 0);

                var names = new string[cinfo];
                var infos = new ColInfo[cinfo];
                for (int i = 0; i < cinfo; i++)
                {
                    string dst = ctx.LoadNonEmptyString();
                    names[i] = dst;

                    // Note that in old files, the source name may be null indicating that
                    // the source column has the same name as the added column.
                    string tmp = ctx.LoadStringOrNull();
                    string src = tmp ?? dst;
                    host.CheckDecode(!string.IsNullOrEmpty(src));

                    int colSrc;
                    if (!inputSchema.TryGetColumnIndex(src, out colSrc))
                        throw host.Except("Source column '{0}' is required but not found", src);
                    var type = inputSchema[colSrc].Type;
                    if (testType != null)
                    {
                        string reason = testType(type);
                        if (reason != null)
                            throw host.Except(InvalidTypeErrorFormat, src, type, reason);
                    }
                    var slotType = transposeInput?.GetSlotType(i);
                    infos[i] = new ColInfo(dst, colSrc, type, slotType as VectorType);
                }

                return new Bindings(parent, infos, inputSchema, false, names);
            }
Example #3
0
        public override Schema GetOutputSchema(Schema inputSchema)
        {
            Host.CheckValue(inputSchema, nameof(inputSchema));

            if (FeatureColumn != null)
            {
                if (!inputSchema.TryGetColumnIndex(FeatureColumn, out int col))
                {
                    throw Host.ExceptSchemaMismatch(nameof(inputSchema), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn, FeatureColumnType.ToString(), null);
                }
                if (!inputSchema[col].Type.Equals(FeatureColumnType))
                {
                    throw Host.ExceptSchemaMismatch(nameof(inputSchema), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn, FeatureColumnType.ToString(), inputSchema[col].Type.ToString());
                }
            }

            return(Transform(new EmptyDataView(Host, inputSchema)).Schema);
        }
        /// <summary>
        /// Initializes a new reference of <see cref="SingleFeaturePredictionTransformerBase{TModel, TScorer}"/>.
        /// </summary>
        /// <param name="host">The local instance of <see cref="IHost"/>.</param>
        /// <param name="model">The model used for scoring.</param>
        /// <param name="trainSchema">The schema of the training data.</param>
        /// <param name="featureColumn">The feature column name.</param>
        private protected SingleFeaturePredictionTransformerBase(IHost host, TModel model, Schema trainSchema, string featureColumn)
            : base(host, model, trainSchema)
        {
            FeatureColumn = featureColumn;
            if (featureColumn == null)
            {
                FeatureColumnType = null;
            }
            else if (!trainSchema.TryGetColumnIndex(featureColumn, out int col))
            {
                throw Host.ExceptSchemaMismatch(nameof(featureColumn), "feature", featureColumn);
            }
            else
            {
                FeatureColumnType = trainSchema[col].Type;
            }

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, ModelAsPredictor);
        }
        public bool TryGetColumnIndex(string name, out int col)
        {
            Contracts.CheckValueOrNull(name);
            if (name == null)
            {
                col = default(int);
                return(false);
            }

            int iinfo;

            if (TryGetColumnIndexCore(name, out iinfo))
            {
                Contracts.Assert(0 <= iinfo && iinfo < InfoCount);
                col = MapIinfoToCol(iinfo);
                return(true);
            }

            // REVIEW: Should we keep a dictionary for this mapping? This first looks up
            // the source column index, then does a linear scan in _colMap, starting at the src
            // slot (since source columns can only shift to larger indices).
            int src;

            if (Input.TryGetColumnIndex(name, out src))
            {
                Contracts.Assert(0 <= src && src < Input.Count);
                int res = src;
                for (; ; res++)
                {
                    Contracts.Assert(0 <= res && res < ColumnCount);
                    Contracts.Assert(_colMap[res] <= src);
                    if (_colMap[res] == src)
                    {
                        col = res;
                        return(true);
                    }
                }
            }

            col = default(int);
            return(false);
        }
            public static Bindings Create(OneToOneTransformBase parent, OneToOneColumn[] column, Schema inputSchema,
                                          ITransposeDataView transposedInput, Func <ColumnType, string> testType)
            {
                Contracts.AssertValue(parent);
                var host = parent.Host;

                host.CheckUserArg(Utils.Size(column) > 0, nameof(column));
                host.AssertValue(inputSchema);
                host.AssertValueOrNull(transposedInput);
                host.AssertValueOrNull(testType);

                var names = new string[column.Length];
                var infos = new ColInfo[column.Length];

                for (int i = 0; i < names.Length; i++)
                {
                    var item = column[i];
                    host.CheckUserArg(item.TrySanitize(), nameof(OneToOneColumn.Name), "Invalid new column name");
                    names[i] = item.Name;

                    int colSrc;
                    if (!inputSchema.TryGetColumnIndex(item.Source, out colSrc))
                    {
                        throw host.ExceptUserArg(nameof(OneToOneColumn.Source), "Source column '{0}' not found", item.Source);
                    }

                    var type = inputSchema[colSrc].Type;
                    if (testType != null)
                    {
                        string reason = testType(type);
                        if (reason != null)
                        {
                            throw host.ExceptUserArg(nameof(OneToOneColumn.Source), InvalidTypeErrorFormat, item.Source, type, reason);
                        }
                    }

                    var slotType = transposedInput?.GetSlotType(i);
                    infos[i] = new ColInfo(names[i], colSrc, type, slotType as VectorType);
                }

                return(new Bindings(parent, infos, inputSchema, true, names));
            }
Example #7
0
        /// <summary>
        /// Initializes a new reference of <see cref="SingleFeaturePredictionTransformerBase{TModel, TScorer}"/>.
        /// </summary>
        /// <param name="host">The local instance of <see cref="IHost"/>.</param>
        /// <param name="model">The model used for scoring.</param>
        /// <param name="trainSchema">The schema of the training data.</param>
        /// <param name="featureColumn">The feature column name.</param>
        public SingleFeaturePredictionTransformerBase(IHost host, TModel model, Schema trainSchema, string featureColumn)
            : base(host, model, trainSchema)
        {
            FeatureColumn = featureColumn;

            FeatureColumn = featureColumn;
            if (featureColumn == null)
            {
                FeatureColumnType = null;
            }
            else if (!trainSchema.TryGetColumnIndex(featureColumn, out int col))
            {
                throw Host.ExceptSchemaMismatch(nameof(featureColumn), RoleMappedSchema.ColumnRole.Feature.Value, featureColumn);
            }
            else
            {
                FeatureColumnType = trainSchema[col].Type;
            }

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model);
        }