public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.Assert(schema.Feature.HasValue); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; var featureSize = FeatureColumn.Type.GetVectorSize(); if (parent.Stringify) { var builder = new DataViewSchema.Builder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextDataViewType.Instance, null); _outputSchema = builder.ToSchema(); if (FeatureColumn.HasSlotNames(featureSize)) { FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureSize); } } else { var metadataBuilder = new DataViewSchema.Annotations.Builder(); if (InputSchema[FeatureColumn.Index].HasSlotNames(featureSize)) { metadataBuilder.AddSlotNames(featureSize, (ref VBuffer <ReadOnlyMemory <char> > value) => FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref value)); } var schemaBuilder = new DataViewSchema.Builder(); var featureContributionType = new VectorType(NumberDataViewType.Single, FeatureColumn.Type as VectorType); schemaBuilder.AddColumn(DefaultColumnNames.FeatureContributions, featureContributionType, metadataBuilder.ToAnnotations()); _outputSchema = schemaBuilder.ToSchema(); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new ZipBinding(new DataViewSchema[] { _outputGenericSchema, _outputSchema, }).OutputSchema; }
public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.AssertValue(schema.Feature); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; if (parent.Stringify) { var builder = new SchemaBuilder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null); _outputSchema = builder.GetSchema(); if (InputSchema.HasSlotNames(InputRoleMappedSchema.Feature.Index, InputRoleMappedSchema.Feature.Type.VectorSize)) { InputSchema.GetMetadata(MetadataUtils.Kinds.SlotNames, InputRoleMappedSchema.Feature.Index, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(InputRoleMappedSchema.Feature.Type.VectorSize); } } else { _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions, new VectorType(NumberType.R4, schema.Feature.Type as VectorType), InputSchema, InputRoleMappedSchema.Feature.Index)); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new CompositeSchema(new Schema[] { _outputGenericSchema, _outputSchema, }).AsSchema; }
private protected GenericScorer GetGenericScorer() { var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumnName); return(new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema)); }
private void SetScorer() { var schema = new RoleMappedSchema(TrainSchema, _trainLabelColumn, FeatureColumn); var args = new MultiClassClassifierScorer.Arguments(); Scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
public FieldAwareFactorizationMachinePredictionTransformer(IHostEnvironment host, ModelLoadContext ctx) : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(FieldAwareFactorizationMachinePredictionTransformer)), ctx) { // *** Binary format *** // <base info> // ids of strings: feature columns. // float: scorer threshold // id of string: scorer threshold column // count of feature columns. FAFM uses more than one. int featCount = Model.FieldCount; FeatureColumns = new string[featCount]; FeatureColumnTypes = new ColumnType[featCount]; for (int i = 0; i < featCount; i++) { FeatureColumns[i] = ctx.LoadString(); if (!TrainSchema.TryGetColumnIndex(FeatureColumns[i], out int col)) { throw Host.ExceptSchemaMismatch(nameof(FeatureColumns), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumns[i]); } FeatureColumnTypes[i] = TrainSchema.GetColumnType(col); } _threshold = ctx.Reader.ReadSingle(); _thresholdColumn = ctx.LoadString(); BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, Model); var schema = GetSchema(); var args = new BinaryClassifierScorer.Arguments { Threshold = _threshold, ThresholdColumn = _thresholdColumn }; Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
public FieldAwareFactorizationMachinePredictionTransformer(IHostEnvironment host, FieldAwareFactorizationMachinePredictor model, Schema trainSchema, string[] featureColumns, float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score) : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(FieldAwareFactorizationMachinePredictionTransformer)), model, trainSchema) { Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn)); _threshold = threshold; _thresholdColumn = thresholdColumn; Host.CheckValue(featureColumns, nameof(featureColumns)); int featCount = featureColumns.Length; Host.Check(featCount >= 0, "Empty features column."); FeatureColumns = featureColumns; FeatureColumnTypes = new ColumnType[featCount]; int i = 0; foreach (var feat in featureColumns) { if (!trainSchema.TryGetColumnIndex(feat, out int col)) { throw Host.ExceptSchemaMismatch(nameof(featureColumns), RoleMappedSchema.ColumnRole.Feature.Value, feat); } FeatureColumnTypes[i++] = trainSchema.GetColumnType(col); } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model); var schema = GetSchema(); var args = new BinaryClassifierScorer.Arguments { Threshold = _threshold, ThresholdColumn = _thresholdColumn }; Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, trainSchema), BindableMapper.Bind(Host, schema), schema); }
/// <summary> /// The counter constructor of re-creating <see cref="MatrixFactorizationPredictionTransformer"/> from the context where /// the original transform is saved. /// </summary> public MatrixFactorizationPredictionTransformer(IHostEnvironment host, ModelLoadContext ctx) : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(MatrixFactorizationPredictionTransformer)), ctx) { // *** Binary format *** // <base info> // string: the column name of matrix's column ids. // string: the column name of matrix's row ids. MatrixColumnIndexColumnName = ctx.LoadString(); MatrixRowIndexColumnName = ctx.LoadString(); if (!TrainSchema.TryGetColumnIndex(MatrixColumnIndexColumnName, out int xCol)) { throw Host.ExceptSchemaMismatch(nameof(MatrixColumnIndexColumnName), RecommenderUtils.MatrixColumnIndexKind.Value, MatrixColumnIndexColumnName); } MatrixColumnIndexColumnType = TrainSchema.GetColumnType(xCol); if (!TrainSchema.TryGetColumnIndex(MatrixRowIndexColumnName, out int yCol)) { throw Host.ExceptSchemaMismatch(nameof(MatrixRowIndexColumnName), RecommenderUtils.MatrixRowIndexKind.Value, MatrixRowIndexColumnName); } MatrixRowIndexColumnType = TrainSchema.GetColumnType(yCol); BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, Model); var schema = GetSchema(); var args = new GenericScorer.Arguments { Suffix = "" }; Scorer = new GenericScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
internal ClusteringPredictionTransformer(IHostEnvironment env, TModel model, DataViewSchema inputSchema, string featureColumn, float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ClusteringPredictionTransformer <TModel>)), model, inputSchema, featureColumn) { Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn)); var schema = new RoleMappedSchema(inputSchema, null, featureColumn); var args = new ClusteringScorer.Arguments(); Scorer = new ClusteringScorer(Host, args, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema); }
internal RankingPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer <TModel>)), ctx) { var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumn); _scorer = new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
public RankingPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer <TModel>)), model, inputSchema, featureColumn) { var schema = new RoleMappedSchema(inputSchema, null, featureColumn); _scorer = new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema); }
public MulticlassPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MulticlassPredictionTransformer <TModel>)), ctx) { // *** Binary format *** // <base info> // id of string: train label column _trainLabelColumn = ctx.LoadStringOrNull(); var schema = new RoleMappedSchema(TrainSchema, _trainLabelColumn, FeatureColumn); var args = new MultiClassClassifierScorer.Arguments(); _scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
public MulticlassPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn, string labelColumn) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MulticlassPredictionTransformer <TModel>)), model, inputSchema, featureColumn) { Host.CheckValueOrNull(labelColumn); _trainLabelColumn = labelColumn; var schema = new RoleMappedSchema(inputSchema, labelColumn, featureColumn); var args = new MultiClassClassifierScorer.Arguments(); _scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema); }
public BinaryPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(BinaryPredictionTransformer <TModel>)), ctx) { // *** Binary format *** // <base info> // float: scorer threshold // id of string: scorer threshold column Threshold = ctx.Reader.ReadSingle(); ThresholdColumn = ctx.LoadString(); var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumn); var args = new BinaryClassifierScorer.Arguments { Threshold = Threshold, ThresholdColumn = ThresholdColumn }; _scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
public BinaryPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn, float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(BinaryPredictionTransformer <TModel>)), model, inputSchema, featureColumn) { Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn)); var schema = new RoleMappedSchema(inputSchema, null, featureColumn); Threshold = threshold; ThresholdColumn = thresholdColumn; var args = new BinaryClassifierScorer.Arguments { Threshold = Threshold, ThresholdColumn = ThresholdColumn }; _scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema); }
private void SetScorer() { var schema = new RoleMappedSchema(TrainSchema, LabelColumnName, FeatureColumnName); var args = new BinaryClassifierScorer.Arguments { Threshold = Threshold, ThresholdColumn = ThresholdColumn }; Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
private void SetScorer() { var schema = new RoleMappedSchema(TrainSchema, _trainLabelColumn, FeatureColumnName); var args = new MulticlassClassificationScorer.Arguments() { ScoreColumnName = _scoreColumn, PredictedLabelColumnName = _predictedLabelColumn }; Scorer = new MulticlassClassificationScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
internal FieldAwareFactorizationMachinePredictionTransformer(IHostEnvironment host, FieldAwareFactorizationMachineModelParameters model, DataViewSchema trainSchema, string[] featureColumns, float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score) : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(FieldAwareFactorizationMachinePredictionTransformer)), model, trainSchema) { Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn)); Host.CheckNonEmpty(featureColumns, nameof(featureColumns)); _threshold = threshold; _thresholdColumn = thresholdColumn; FeatureColumns = featureColumns; var featureColumnTypes = new DataViewType[featureColumns.Length]; int i = 0; foreach (var feat in featureColumns) { if (!trainSchema.TryGetColumnIndex(feat, out int col)) { throw Host.ExceptSchemaMismatch(nameof(featureColumns), "feature", feat); } featureColumnTypes[i++] = trainSchema[col].Type; } FeatureColumnTypes = featureColumnTypes; BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model); var schema = GetSchema(); var args = new BinaryClassifierScorer.Arguments { Threshold = _threshold, ThresholdColumn = _thresholdColumn }; Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, trainSchema), BindableMapper.Bind(Host, schema), schema); }
internal ClusteringPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ClusteringPredictionTransformer <TModel>)), ctx) { // *** Binary format *** // <base info> var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumnName); var args = new ClusteringScorer.Arguments(); Scorer = new ClusteringScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema); }
/// <summary> /// Build a transformer based on matrix factorization predictor (model) and the input schema (trainSchema). The created /// transformer can only transform IDataView objects compatible to the input schema; that is, that IDataView must contain /// columns specified by <see cref="MatrixColumnIndexColumnName"/>, <see cref="MatrixColumnIndexColumnType"/>, <see cref="MatrixRowIndexColumnName"/>, and <see cref="MatrixRowIndexColumnType"></see>. /// The output column is "Score" by default but user can append a string to it. /// </summary> /// <param name="env">Eviroment object for showing information</param> /// <param name="model">The model trained by one of the training functions in <see cref="MatrixFactorizationTrainer"/></param> /// <param name="trainSchema">Targeted schema that containing columns named as xColumnName</param> /// <param name="matrixColumnIndexColumnName">The name of the column used as role <see cref="RecommenderUtils.MatrixColumnIndexKind"/> in matrix factorization world</param> /// <param name="matrixRowIndexColumnName">The name of the column used as role <see cref="RecommenderUtils.MatrixRowIndexKind"/> in matrix factorization world</param> /// <param name="scoreColumnNameSuffix">A string attached to the output column name of this transformer</param> public MatrixFactorizationPredictionTransformer(IHostEnvironment env, MatrixFactorizationPredictor model, Schema trainSchema, string matrixColumnIndexColumnName, string matrixRowIndexColumnName, string scoreColumnNameSuffix = "") : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MatrixFactorizationPredictionTransformer)), model, trainSchema) { Host.CheckNonEmpty(matrixColumnIndexColumnName, nameof(matrixRowIndexColumnName)); Host.CheckNonEmpty(matrixColumnIndexColumnName, nameof(matrixRowIndexColumnName)); MatrixColumnIndexColumnName = matrixColumnIndexColumnName; MatrixRowIndexColumnName = matrixRowIndexColumnName; if (!trainSchema.TryGetColumnIndex(MatrixColumnIndexColumnName, out int xCol)) { throw Host.ExceptSchemaMismatch(nameof(MatrixColumnIndexColumnName), RecommenderUtils.MatrixColumnIndexKind.Value, MatrixColumnIndexColumnName); } MatrixColumnIndexColumnType = trainSchema.GetColumnType(xCol); if (!trainSchema.TryGetColumnIndex(MatrixRowIndexColumnName, out int yCol)) { throw Host.ExceptSchemaMismatch(nameof(yCol), RecommenderUtils.MatrixRowIndexKind.Value, MatrixRowIndexColumnName); } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model); var schema = GetSchema(); var args = new GenericScorer.Arguments { Suffix = scoreColumnNameSuffix }; Scorer = new GenericScorer(Host, args, new EmptyDataView(Host, trainSchema), BindableMapper.Bind(Host, schema), schema); }