private static void ComputeColumnMapping(Schema input, string[] names, out int[] colMap, out int[] mapIinfoToCol) { // To compute the column mapping information, first populate: // * _colMap[src] with the ~ of the iinfo that hides src (zero for none). // * _mapIinfoToCol[iinfo] with the ~ of the source column that iinfo hides (zero for none). colMap = new int[input.Count + names.Length]; mapIinfoToCol = new int[names.Length]; for (int iinfo = 0; iinfo < names.Length; iinfo++) { var name = names[iinfo]; int colHidden; if (input.TryGetColumnIndex(name, out colHidden)) { Contracts.Check(0 <= colHidden && colHidden < input.Count); var str = input[colHidden].Name; Contracts.Check(str == name); Contracts.Check(colMap[colHidden] == 0); mapIinfoToCol[iinfo] = ~colHidden; colMap[colHidden] = ~iinfo; } } // Now back-fill the column mapping. int colDst = colMap.Length; for (int iinfo = names.Length; --iinfo >= 0;) { Contracts.Assert(mapIinfoToCol[iinfo] <= 0); if (mapIinfoToCol[iinfo] == 0) { colMap[--colDst] = ~iinfo; mapIinfoToCol[iinfo] = colDst; } } for (int colSrc = input.Count; --colSrc >= 0;) { Contracts.Assert(colMap[colSrc] <= 0); if (colMap[colSrc] < 0) { Contracts.Assert(colDst > 1); int iinfo = ~colMap[colSrc]; Contracts.Assert(0 <= iinfo && iinfo < names.Length); Contracts.Assert(mapIinfoToCol[iinfo] == ~colSrc); colMap[--colDst] = ~iinfo; mapIinfoToCol[iinfo] = colDst; } Contracts.Assert(colDst > 0); colMap[--colDst] = colSrc; } Contracts.Assert(colDst == 0); }
public static Bindings Create(OneToOneTransformBase parent, ModelLoadContext ctx, Schema inputSchema, ITransposeDataView transposeInput, Func<ColumnType, string> testType) { Contracts.AssertValue(parent); var host = parent.Host; host.CheckValue(ctx, nameof(ctx)); host.AssertValue(inputSchema); host.AssertValueOrNull(transposeInput); host.AssertValueOrNull(testType); // *** Binary format *** // int: number of added columns // for each added column // int: id of output column name // int: id of input column name int cinfo = ctx.Reader.ReadInt32(); host.CheckDecode(cinfo > 0); var names = new string[cinfo]; var infos = new ColInfo[cinfo]; for (int i = 0; i < cinfo; i++) { string dst = ctx.LoadNonEmptyString(); names[i] = dst; // Note that in old files, the source name may be null indicating that // the source column has the same name as the added column. string tmp = ctx.LoadStringOrNull(); string src = tmp ?? dst; host.CheckDecode(!string.IsNullOrEmpty(src)); int colSrc; if (!inputSchema.TryGetColumnIndex(src, out colSrc)) throw host.Except("Source column '{0}' is required but not found", src); var type = inputSchema[colSrc].Type; if (testType != null) { string reason = testType(type); if (reason != null) throw host.Except(InvalidTypeErrorFormat, src, type, reason); } var slotType = transposeInput?.GetSlotType(i); infos[i] = new ColInfo(dst, colSrc, type, slotType as VectorType); } return new Bindings(parent, infos, inputSchema, false, names); }
public override Schema GetOutputSchema(Schema inputSchema) { Host.CheckValue(inputSchema, nameof(inputSchema)); if (FeatureColumn != null) { if (!inputSchema.TryGetColumnIndex(FeatureColumn, out int col)) { throw Host.ExceptSchemaMismatch(nameof(inputSchema), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn, FeatureColumnType.ToString(), null); } if (!inputSchema[col].Type.Equals(FeatureColumnType)) { throw Host.ExceptSchemaMismatch(nameof(inputSchema), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn, FeatureColumnType.ToString(), inputSchema[col].Type.ToString()); } } return(Transform(new EmptyDataView(Host, inputSchema)).Schema); }
/// <summary> /// Initializes a new reference of <see cref="SingleFeaturePredictionTransformerBase{TModel, TScorer}"/>. /// </summary> /// <param name="host">The local instance of <see cref="IHost"/>.</param> /// <param name="model">The model used for scoring.</param> /// <param name="trainSchema">The schema of the training data.</param> /// <param name="featureColumn">The feature column name.</param> private protected SingleFeaturePredictionTransformerBase(IHost host, TModel model, Schema trainSchema, string featureColumn) : base(host, model, trainSchema) { FeatureColumn = featureColumn; if (featureColumn == null) { FeatureColumnType = null; } else if (!trainSchema.TryGetColumnIndex(featureColumn, out int col)) { throw Host.ExceptSchemaMismatch(nameof(featureColumn), "feature", featureColumn); } else { FeatureColumnType = trainSchema[col].Type; } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, ModelAsPredictor); }
public bool TryGetColumnIndex(string name, out int col) { Contracts.CheckValueOrNull(name); if (name == null) { col = default(int); return(false); } int iinfo; if (TryGetColumnIndexCore(name, out iinfo)) { Contracts.Assert(0 <= iinfo && iinfo < InfoCount); col = MapIinfoToCol(iinfo); return(true); } // REVIEW: Should we keep a dictionary for this mapping? This first looks up // the source column index, then does a linear scan in _colMap, starting at the src // slot (since source columns can only shift to larger indices). int src; if (Input.TryGetColumnIndex(name, out src)) { Contracts.Assert(0 <= src && src < Input.Count); int res = src; for (; ; res++) { Contracts.Assert(0 <= res && res < ColumnCount); Contracts.Assert(_colMap[res] <= src); if (_colMap[res] == src) { col = res; return(true); } } } col = default(int); return(false); }
public static Bindings Create(OneToOneTransformBase parent, OneToOneColumn[] column, Schema inputSchema, ITransposeDataView transposedInput, Func <ColumnType, string> testType) { Contracts.AssertValue(parent); var host = parent.Host; host.CheckUserArg(Utils.Size(column) > 0, nameof(column)); host.AssertValue(inputSchema); host.AssertValueOrNull(transposedInput); host.AssertValueOrNull(testType); var names = new string[column.Length]; var infos = new ColInfo[column.Length]; for (int i = 0; i < names.Length; i++) { var item = column[i]; host.CheckUserArg(item.TrySanitize(), nameof(OneToOneColumn.Name), "Invalid new column name"); names[i] = item.Name; int colSrc; if (!inputSchema.TryGetColumnIndex(item.Source, out colSrc)) { throw host.ExceptUserArg(nameof(OneToOneColumn.Source), "Source column '{0}' not found", item.Source); } var type = inputSchema[colSrc].Type; if (testType != null) { string reason = testType(type); if (reason != null) { throw host.ExceptUserArg(nameof(OneToOneColumn.Source), InvalidTypeErrorFormat, item.Source, type, reason); } } var slotType = transposedInput?.GetSlotType(i); infos[i] = new ColInfo(names[i], colSrc, type, slotType as VectorType); } return(new Bindings(parent, infos, inputSchema, true, names)); }
/// <summary> /// Initializes a new reference of <see cref="SingleFeaturePredictionTransformerBase{TModel, TScorer}"/>. /// </summary> /// <param name="host">The local instance of <see cref="IHost"/>.</param> /// <param name="model">The model used for scoring.</param> /// <param name="trainSchema">The schema of the training data.</param> /// <param name="featureColumn">The feature column name.</param> public SingleFeaturePredictionTransformerBase(IHost host, TModel model, Schema trainSchema, string featureColumn) : base(host, model, trainSchema) { FeatureColumn = featureColumn; FeatureColumn = featureColumn; if (featureColumn == null) { FeatureColumnType = null; } else if (!trainSchema.TryGetColumnIndex(featureColumn, out int col)) { throw Host.ExceptSchemaMismatch(nameof(featureColumn), RoleMappedSchema.ColumnRole.Feature.Value, featureColumn); } else { FeatureColumnType = trainSchema[col].Type; } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model); }