public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema)
            {
                Contracts.AssertValue(env);
                _env = env;
                _env.AssertValue(schema);
                _env.AssertValue(parent);
                _env.Assert(schema.Feature.HasValue);
                _parent = parent;
                InputRoleMappedSchema = schema;
                var genericMapper = parent.GenericMapper.Bind(_env, schema);

                _genericRowMapper = genericMapper as ISchemaBoundRowMapper;
                var featureSize = FeatureColumn.Type.GetVectorSize();

                if (parent.Stringify)
                {
                    var builder = new DataViewSchema.Builder();
                    builder.AddColumn(DefaultColumnNames.FeatureContributions, TextDataViewType.Instance, null);
                    _outputSchema = builder.ToSchema();
                    if (FeatureColumn.HasSlotNames(featureSize))
                    {
                        FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames);
                    }
                    else
                    {
                        _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureSize);
                    }
                }
                else
                {
                    var metadataBuilder = new DataViewSchema.Annotations.Builder();
                    if (InputSchema[FeatureColumn.Index].HasSlotNames(featureSize))
                    {
                        metadataBuilder.AddSlotNames(featureSize, (ref VBuffer <ReadOnlyMemory <char> > value) =>
                                                     FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref value));
                    }

                    var schemaBuilder           = new DataViewSchema.Builder();
                    var featureContributionType = new VectorType(NumberDataViewType.Single, FeatureColumn.Type as VectorType);
                    schemaBuilder.AddColumn(DefaultColumnNames.FeatureContributions, featureContributionType, metadataBuilder.ToAnnotations());
                    _outputSchema = schemaBuilder.ToSchema();
                }

                _outputGenericSchema = _genericRowMapper.OutputSchema;
                OutputSchema         = new ZipBinding(new DataViewSchema[] { _outputGenericSchema, _outputSchema, }).OutputSchema;
            }
            public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema)
            {
                Contracts.AssertValue(env);
                _env = env;
                _env.AssertValue(schema);
                _env.AssertValue(parent);
                _env.AssertValue(schema.Feature);
                _parent = parent;
                InputRoleMappedSchema = schema;
                var genericMapper = parent.GenericMapper.Bind(_env, schema);

                _genericRowMapper = genericMapper as ISchemaBoundRowMapper;

                if (parent.Stringify)
                {
                    var builder = new SchemaBuilder();
                    builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null);
                    _outputSchema = builder.GetSchema();
                    if (InputSchema.HasSlotNames(InputRoleMappedSchema.Feature.Index, InputRoleMappedSchema.Feature.Type.VectorSize))
                    {
                        InputSchema.GetMetadata(MetadataUtils.Kinds.SlotNames, InputRoleMappedSchema.Feature.Index,
                                                ref _slotNames);
                    }
                    else
                    {
                        _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(InputRoleMappedSchema.Feature.Type.VectorSize);
                    }
                }
                else
                {
                    _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions,
                                                                                new VectorType(NumberType.R4, schema.Feature.Type as VectorType),
                                                                                InputSchema, InputRoleMappedSchema.Feature.Index));
                }

                _outputGenericSchema = _genericRowMapper.OutputSchema;
                OutputSchema         = new CompositeSchema(new Schema[] { _outputGenericSchema, _outputSchema, }).AsSchema;
            }
Exemplo n.º 3
0
        private protected GenericScorer GetGenericScorer()
        {
            var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumnName);

            return(new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema));
        }
Exemplo n.º 4
0
        private void SetScorer()
        {
            var schema = new RoleMappedSchema(TrainSchema, _trainLabelColumn, FeatureColumn);
            var args   = new MultiClassClassifierScorer.Arguments();

            Scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 5
0
        public FieldAwareFactorizationMachinePredictionTransformer(IHostEnvironment host, ModelLoadContext ctx)
            : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(FieldAwareFactorizationMachinePredictionTransformer)), ctx)
        {
            // *** Binary format ***
            // <base info>
            // ids of strings: feature columns.
            // float: scorer threshold
            // id of string: scorer threshold column

            // count of feature columns. FAFM uses more than one.
            int featCount = Model.FieldCount;

            FeatureColumns     = new string[featCount];
            FeatureColumnTypes = new ColumnType[featCount];

            for (int i = 0; i < featCount; i++)
            {
                FeatureColumns[i] = ctx.LoadString();
                if (!TrainSchema.TryGetColumnIndex(FeatureColumns[i], out int col))
                {
                    throw Host.ExceptSchemaMismatch(nameof(FeatureColumns), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumns[i]);
                }
                FeatureColumnTypes[i] = TrainSchema.GetColumnType(col);
            }

            _threshold       = ctx.Reader.ReadSingle();
            _thresholdColumn = ctx.LoadString();

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, Model);

            var schema = GetSchema();
            var args   = new BinaryClassifierScorer.Arguments {
                Threshold = _threshold, ThresholdColumn = _thresholdColumn
            };

            Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 6
0
        public FieldAwareFactorizationMachinePredictionTransformer(IHostEnvironment host, FieldAwareFactorizationMachinePredictor model, Schema trainSchema,
                                                                   string[] featureColumns, float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score)
            : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(FieldAwareFactorizationMachinePredictionTransformer)), model, trainSchema)
        {
            Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn));
            _threshold       = threshold;
            _thresholdColumn = thresholdColumn;

            Host.CheckValue(featureColumns, nameof(featureColumns));
            int featCount = featureColumns.Length;

            Host.Check(featCount >= 0, "Empty features column.");

            FeatureColumns     = featureColumns;
            FeatureColumnTypes = new ColumnType[featCount];

            int i = 0;

            foreach (var feat in featureColumns)
            {
                if (!trainSchema.TryGetColumnIndex(feat, out int col))
                {
                    throw Host.ExceptSchemaMismatch(nameof(featureColumns), RoleMappedSchema.ColumnRole.Feature.Value, feat);
                }
                FeatureColumnTypes[i++] = trainSchema.GetColumnType(col);
            }

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model);

            var schema = GetSchema();
            var args   = new BinaryClassifierScorer.Arguments {
                Threshold = _threshold, ThresholdColumn = _thresholdColumn
            };

            Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, trainSchema), BindableMapper.Bind(Host, schema), schema);
        }
        /// <summary>
        /// The counter constructor of re-creating <see cref="MatrixFactorizationPredictionTransformer"/> from the context where
        /// the original transform is saved.
        /// </summary>
        public MatrixFactorizationPredictionTransformer(IHostEnvironment host, ModelLoadContext ctx)
            : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(MatrixFactorizationPredictionTransformer)), ctx)
        {
            // *** Binary format ***
            // <base info>
            // string: the column name of matrix's column ids.
            // string: the column name of matrix's row ids.

            MatrixColumnIndexColumnName = ctx.LoadString();
            MatrixRowIndexColumnName    = ctx.LoadString();

            if (!TrainSchema.TryGetColumnIndex(MatrixColumnIndexColumnName, out int xCol))
            {
                throw Host.ExceptSchemaMismatch(nameof(MatrixColumnIndexColumnName), RecommenderUtils.MatrixColumnIndexKind.Value, MatrixColumnIndexColumnName);
            }
            MatrixColumnIndexColumnType = TrainSchema.GetColumnType(xCol);

            if (!TrainSchema.TryGetColumnIndex(MatrixRowIndexColumnName, out int yCol))
            {
                throw Host.ExceptSchemaMismatch(nameof(MatrixRowIndexColumnName), RecommenderUtils.MatrixRowIndexKind.Value, MatrixRowIndexColumnName);
            }
            MatrixRowIndexColumnType = TrainSchema.GetColumnType(yCol);

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, Model);

            var schema = GetSchema();
            var args   = new GenericScorer.Arguments {
                Suffix = ""
            };

            Scorer = new GenericScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 8
0
        internal ClusteringPredictionTransformer(IHostEnvironment env, TModel model, DataViewSchema inputSchema, string featureColumn,
                                                 float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ClusteringPredictionTransformer <TModel>)), model, inputSchema, featureColumn)
        {
            Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn));
            var schema = new RoleMappedSchema(inputSchema, null, featureColumn);

            var args = new ClusteringScorer.Arguments();

            Scorer = new ClusteringScorer(Host, args, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 9
0
        internal RankingPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer <TModel>)), ctx)
        {
            var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumn);

            _scorer = new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 10
0
        public RankingPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer <TModel>)), model, inputSchema, featureColumn)
        {
            var schema = new RoleMappedSchema(inputSchema, null, featureColumn);

            _scorer = new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 11
0
        public MulticlassPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MulticlassPredictionTransformer <TModel>)), ctx)
        {
            // *** Binary format ***
            // <base info>
            // id of string: train label column

            _trainLabelColumn = ctx.LoadStringOrNull();

            var schema = new RoleMappedSchema(TrainSchema, _trainLabelColumn, FeatureColumn);
            var args   = new MultiClassClassifierScorer.Arguments();

            _scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 12
0
        public MulticlassPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn, string labelColumn)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MulticlassPredictionTransformer <TModel>)), model, inputSchema, featureColumn)
        {
            Host.CheckValueOrNull(labelColumn);

            _trainLabelColumn = labelColumn;
            var schema = new RoleMappedSchema(inputSchema, labelColumn, featureColumn);
            var args   = new MultiClassClassifierScorer.Arguments();

            _scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 13
0
        public BinaryPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(BinaryPredictionTransformer <TModel>)), ctx)
        {
            // *** Binary format ***
            // <base info>
            // float: scorer threshold
            // id of string: scorer threshold column

            Threshold       = ctx.Reader.ReadSingle();
            ThresholdColumn = ctx.LoadString();

            var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumn);
            var args   = new BinaryClassifierScorer.Arguments {
                Threshold = Threshold, ThresholdColumn = ThresholdColumn
            };

            _scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 14
0
        public BinaryPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn,
                                           float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(BinaryPredictionTransformer <TModel>)), model, inputSchema, featureColumn)
        {
            Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn));
            var schema = new RoleMappedSchema(inputSchema, null, featureColumn);

            Threshold       = threshold;
            ThresholdColumn = thresholdColumn;

            var args = new BinaryClassifierScorer.Arguments {
                Threshold = Threshold, ThresholdColumn = ThresholdColumn
            };

            _scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 15
0
        private void SetScorer()
        {
            var schema = new RoleMappedSchema(TrainSchema, LabelColumnName, FeatureColumnName);
            var args   = new BinaryClassifierScorer.Arguments {
                Threshold = Threshold, ThresholdColumn = ThresholdColumn
            };

            Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 16
0
        private void SetScorer()
        {
            var schema = new RoleMappedSchema(TrainSchema, _trainLabelColumn, FeatureColumnName);
            var args   = new MulticlassClassificationScorer.Arguments()
            {
                ScoreColumnName = _scoreColumn, PredictedLabelColumnName = _predictedLabelColumn
            };

            Scorer = new MulticlassClassificationScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
        internal FieldAwareFactorizationMachinePredictionTransformer(IHostEnvironment host, FieldAwareFactorizationMachineModelParameters model, DataViewSchema trainSchema,
                                                                     string[] featureColumns, float threshold = 0f, string thresholdColumn = DefaultColumnNames.Score)
            : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(FieldAwareFactorizationMachinePredictionTransformer)), model, trainSchema)
        {
            Host.CheckNonEmpty(thresholdColumn, nameof(thresholdColumn));
            Host.CheckNonEmpty(featureColumns, nameof(featureColumns));

            _threshold       = threshold;
            _thresholdColumn = thresholdColumn;
            FeatureColumns   = featureColumns;
            var featureColumnTypes = new DataViewType[featureColumns.Length];

            int i = 0;

            foreach (var feat in featureColumns)
            {
                if (!trainSchema.TryGetColumnIndex(feat, out int col))
                {
                    throw Host.ExceptSchemaMismatch(nameof(featureColumns), "feature", feat);
                }
                featureColumnTypes[i++] = trainSchema[col].Type;
            }
            FeatureColumnTypes = featureColumnTypes;

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model);

            var schema = GetSchema();
            var args   = new BinaryClassifierScorer.Arguments {
                Threshold = _threshold, ThresholdColumn = _thresholdColumn
            };

            Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, trainSchema), BindableMapper.Bind(Host, schema), schema);
        }
Exemplo n.º 18
0
        internal ClusteringPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx)
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ClusteringPredictionTransformer <TModel>)), ctx)
        {
            // *** Binary format ***
            // <base info>

            var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumnName);
            var args   = new ClusteringScorer.Arguments();

            Scorer = new ClusteringScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
        }
        /// <summary>
        /// Build a transformer based on matrix factorization predictor (model) and the input schema (trainSchema). The created
        /// transformer can only transform IDataView objects compatible to the input schema; that is, that IDataView must contain
        /// columns specified by <see cref="MatrixColumnIndexColumnName"/>, <see cref="MatrixColumnIndexColumnType"/>, <see cref="MatrixRowIndexColumnName"/>, and <see cref="MatrixRowIndexColumnType"></see>.
        /// The output column is "Score" by default but user can append a string to it.
        /// </summary>
        /// <param name="env">Eviroment object for showing information</param>
        /// <param name="model">The model trained by one of the training functions in <see cref="MatrixFactorizationTrainer"/></param>
        /// <param name="trainSchema">Targeted schema that containing columns named as xColumnName</param>
        /// <param name="matrixColumnIndexColumnName">The name of the column used as role <see cref="RecommenderUtils.MatrixColumnIndexKind"/> in matrix factorization world</param>
        /// <param name="matrixRowIndexColumnName">The name of the column used as role <see cref="RecommenderUtils.MatrixRowIndexKind"/> in matrix factorization world</param>
        /// <param name="scoreColumnNameSuffix">A string attached to the output column name of this transformer</param>
        public MatrixFactorizationPredictionTransformer(IHostEnvironment env, MatrixFactorizationPredictor model, Schema trainSchema,
                                                        string matrixColumnIndexColumnName, string matrixRowIndexColumnName, string scoreColumnNameSuffix = "")
            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MatrixFactorizationPredictionTransformer)), model, trainSchema)
        {
            Host.CheckNonEmpty(matrixColumnIndexColumnName, nameof(matrixRowIndexColumnName));
            Host.CheckNonEmpty(matrixColumnIndexColumnName, nameof(matrixRowIndexColumnName));

            MatrixColumnIndexColumnName = matrixColumnIndexColumnName;
            MatrixRowIndexColumnName    = matrixRowIndexColumnName;

            if (!trainSchema.TryGetColumnIndex(MatrixColumnIndexColumnName, out int xCol))
            {
                throw Host.ExceptSchemaMismatch(nameof(MatrixColumnIndexColumnName), RecommenderUtils.MatrixColumnIndexKind.Value, MatrixColumnIndexColumnName);
            }
            MatrixColumnIndexColumnType = trainSchema.GetColumnType(xCol);
            if (!trainSchema.TryGetColumnIndex(MatrixRowIndexColumnName, out int yCol))
            {
                throw Host.ExceptSchemaMismatch(nameof(yCol), RecommenderUtils.MatrixRowIndexKind.Value, MatrixRowIndexColumnName);
            }

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model);

            var schema = GetSchema();
            var args   = new GenericScorer.Arguments {
                Suffix = scoreColumnNameSuffix
            };

            Scorer = new GenericScorer(Host, args, new EmptyDataView(Host, trainSchema), BindableMapper.Bind(Host, schema), schema);
        }