public void SequencePredictorSchemaTest()
        {
            int keyCount        = 10;
            var scoreColumnType = new KeyType(DataKind.U4, 0, keyCount);
            VBuffer <ReadOnlyMemory <char> > keyNames = GenerateKeyNames(keyCount);

            var sequenceSchema = ScoreSchemaFactory.CreateSequencePredictionSchema(scoreColumnType,
                                                                                   MetadataUtils.Const.ScoreColumnKind.SequenceClassification, keyNames);

            // Output schema should only contain one column, which is the predicted label.
            Assert.Single(sequenceSchema);
            var scoreColumn = sequenceSchema[0];

            // Check score column name.
            Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreColumn.Name);

            // Check score column type.
            Assert.True(scoreColumn.Type.IsKey);
            Assert.Equal((scoreColumnType as KeyType).Min, (scoreColumn.Type as KeyType).Min);
            Assert.Equal((scoreColumnType as KeyType).Count, (scoreColumn.Type as KeyType).Count);
            Assert.Equal((scoreColumnType as KeyType).RawKind, (scoreColumn.Type as KeyType).RawKind);
            Assert.Equal((scoreColumnType as KeyType).Contiguous, (scoreColumn.Type as KeyType).Contiguous);

            // Check metadata. Because keyNames is not empty, there should be three metadata fields.
            var scoreMetadata = scoreColumn.Metadata;

            Assert.Equal(3, scoreMetadata.Schema.Count);

            // Check metadata columns' names.
            Assert.Equal(MetadataUtils.Kinds.KeyValues, scoreMetadata.Schema[0].Name);
            Assert.Equal(MetadataUtils.Kinds.ScoreColumnKind, scoreMetadata.Schema[1].Name);
            Assert.Equal(MetadataUtils.Kinds.ScoreValueKind, scoreMetadata.Schema[2].Name);

            // Check metadata columns' types.
            Assert.True(scoreMetadata.Schema[0].Type.IsVector);
            Assert.Equal(keyNames.Length, (scoreMetadata.Schema[0].Type as VectorType).VectorSize);
            Assert.Equal(TextType.Instance, (scoreMetadata.Schema[0].Type as VectorType).ItemType);
            Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[1].Type);
            Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[2].Type);

            // Check metadata columns' values.
            var keyNamesGetter = scoreMetadata.GetGetter <VBuffer <ReadOnlyMemory <char> > >(0);
            var actualKeyNames = new VBuffer <ReadOnlyMemory <char> >();

            keyNamesGetter(ref actualKeyNames);
            Assert.Equal(keyNames.Length, actualKeyNames.Length);
            Assert.Equal(keyNames.DenseValues(), actualKeyNames.DenseValues());

            var scoreColumnKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(1);
            ReadOnlyMemory <char> scoreColumnKindValue = null;

            scoreColumnKindGetter(ref scoreColumnKindValue);
            Assert.Equal(MetadataUtils.Const.ScoreColumnKind.SequenceClassification, scoreColumnKindValue.ToString());

            var scoreValueKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(2);
            ReadOnlyMemory <char> scoreValueKindValue = null;

            scoreValueKindGetter(ref scoreValueKindValue);
            Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreValueKindValue.ToString());
        }
Example #2
0
        public void SequencePredictorSchemaTest()
        {
            int keyCount = 10;
            var expectedScoreColumnType = new KeyDataViewType(typeof(uint), keyCount);
            VBuffer <ReadOnlyMemory <char> > keyNames = GenerateKeyNames(keyCount);

            var sequenceSchema = ScoreSchemaFactory.CreateSequencePredictionSchema(expectedScoreColumnType,
                                                                                   AnnotationUtils.Const.ScoreColumnKind.SequenceClassification, keyNames);

            // Output schema should only contain one column, which is the predicted label.
            Assert.Single(sequenceSchema);
            var scoreColumn = sequenceSchema[0];

            // Check score column name.
            Assert.Equal(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreColumn.Name);

            // Check score column type.
            var actualScoreColumnType = scoreColumn.Type as KeyDataViewType;

            Assert.NotNull(actualScoreColumnType);
            Assert.Equal(expectedScoreColumnType.Count, actualScoreColumnType.Count);
            Assert.Equal(expectedScoreColumnType.RawType, actualScoreColumnType.RawType);

            // Check metadata. Because keyNames is not empty, there should be three metadata fields.
            var scoreMetadata = scoreColumn.Annotations;

            Assert.Equal(3, scoreMetadata.Schema.Count);

            // Check metadata columns' names.
            Assert.Equal(AnnotationUtils.Kinds.KeyValues, scoreMetadata.Schema[0].Name);
            Assert.Equal(AnnotationUtils.Kinds.ScoreColumnKind, scoreMetadata.Schema[1].Name);
            Assert.Equal(AnnotationUtils.Kinds.ScoreValueKind, scoreMetadata.Schema[2].Name);

            // Check metadata columns' types.
            Assert.True(scoreMetadata.Schema[0].Type is VectorDataViewType);
            Assert.Equal(keyNames.Length, (scoreMetadata.Schema[0].Type as VectorDataViewType).Size);
            Assert.Equal(TextDataViewType.Instance, (scoreMetadata.Schema[0].Type as VectorDataViewType).ItemType);
            Assert.Equal(TextDataViewType.Instance, scoreColumn.Annotations.Schema[1].Type);
            Assert.Equal(TextDataViewType.Instance, scoreColumn.Annotations.Schema[2].Type);

            // Check metadata columns' values.
            var keyNamesGetter = scoreMetadata.GetGetter <VBuffer <ReadOnlyMemory <char> > >(scoreMetadata.Schema[0]);
            var actualKeyNames = new VBuffer <ReadOnlyMemory <char> >();

            keyNamesGetter(ref actualKeyNames);
            Assert.Equal(keyNames.Length, actualKeyNames.Length);
            Assert.Equal(keyNames.DenseValues(), actualKeyNames.DenseValues());

            var scoreColumnKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(scoreMetadata.Schema[1]);
            ReadOnlyMemory <char> scoreColumnKindValue = null;

            scoreColumnKindGetter(ref scoreColumnKindValue);
            Assert.Equal(AnnotationUtils.Const.ScoreColumnKind.SequenceClassification, scoreColumnKindValue.ToString());

            var scoreValueKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(scoreMetadata.Schema[2]);
            ReadOnlyMemory <char> scoreValueKindValue = null;

            scoreValueKindGetter(ref scoreValueKindValue);
            Assert.Equal(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreValueKindValue.ToString());
        }
            public BoundBase(SchemaBindablePipelineEnsembleBase parent, RoleMappedSchema schema)
            {
                Parent = parent;
                InputRoleMappedSchema = schema;
                OutputSchema          = ScoreSchemaFactory.Create(Parent.ScoreType, Parent._scoreColumnKind);
                _inputColIndices      = new HashSet <int>();
                for (int i = 0; i < Parent._inputCols.Length; i++)
                {
                    var name = Parent._inputCols[i];
                    var col  = InputRoleMappedSchema.Schema.GetColumnOrNull(name);
                    if (!col.HasValue)
                    {
                        throw Parent.Host.ExceptSchemaMismatch(nameof(InputRoleMappedSchema), "input", name);
                    }
                    _inputColIndices.Add(col.Value.Index);
                }

                Mappers        = new ISchemaBoundRowMapper[Parent.PredictorModels.Length];
                BoundPipelines = new IRowToRowMapper[Parent.PredictorModels.Length];
                ScoreCols      = new int[Parent.PredictorModels.Length];
                for (int i = 0; i < Mappers.Length; i++)
                {
                    // Get the RoleMappedSchema to pass to the predictor.
                    var emptyDv = new EmptyDataView(Parent.Host, schema.Schema);
                    Parent.PredictorModels[i].PrepareData(Parent.Host, emptyDv, out RoleMappedData rmd, out IPredictor predictor);

                    // Get the predictor as a bindable mapper, and bind it to the RoleMappedSchema found above.
                    var bindable = ScoreUtils.GetSchemaBindableMapper(Parent.Host, Parent.PredictorModels[i].Predictor);
                    Mappers[i] = bindable.Bind(Parent.Host, rmd.Schema) as ISchemaBoundRowMapper;
                    if (Mappers[i] == null)
                    {
                        throw Parent.Host.Except("Predictor {0} is not a row to row mapper", i);
                    }

                    // Make sure there is a score column, and remember its index.
                    var scoreCol = Mappers[i].OutputSchema.GetColumnOrNull(MetadataUtils.Const.ScoreValueKind.Score);
                    if (!scoreCol.HasValue)
                    {
                        throw Parent.Host.Except("Predictor {0} does not contain a score column", i);
                    }
                    ScoreCols[i] = scoreCol.Value.Index;

                    // Get the pipeline.
                    var dv       = new EmptyDataView(Parent.Host, schema.Schema);
                    var tm       = new TransformModelImpl(Parent.Host, dv, dv);
                    var pipeline = Parent.PredictorModels[i].TransformModel.Apply(Parent.Host, tm);
                    BoundPipelines[i] = pipeline.AsRowToRowMapper(Parent.Host);
                    if (BoundPipelines[i] == null)
                    {
                        throw Parent.Host.Except("Transform pipeline {0} contains transforms that do not implement IRowToRowMapper", i);
                    }
                }
            }
Example #4
0
        public void SequencePredictorSchemaWithoutKeyNamesMetadataTest()
        {
            int keyCount = 10;
            var expectedScoreColumnType = new KeyType(DataKind.U4, 0, keyCount);
            VBuffer <ReadOnlyMemory <char> > keyNames = GenerateKeyNames(0);

            var sequenceSchema = ScoreSchemaFactory.CreateSequencePredictionSchema(expectedScoreColumnType,
                                                                                   MetadataUtils.Const.ScoreColumnKind.SequenceClassification, keyNames);

            // Output schema should only contain one column, which is the predicted label.
            Assert.Single(sequenceSchema);
            var scoreColumn = sequenceSchema[0];

            // Check score column name.
            Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreColumn.Name);

            // Check score column type.
            var actualScoreColumnType = scoreColumn.Type as KeyType;

            Assert.NotNull(actualScoreColumnType);
            Assert.Equal(expectedScoreColumnType.Min, actualScoreColumnType.Min);
            Assert.Equal(expectedScoreColumnType.Count, actualScoreColumnType.Count);
            Assert.Equal(expectedScoreColumnType.RawType, actualScoreColumnType.RawType);
            Assert.Equal(expectedScoreColumnType.Contiguous, actualScoreColumnType.Contiguous);

            // Check metadata. Because keyNames is not empty, there should be three metadata fields.
            var scoreMetadata = scoreColumn.Metadata;

            Assert.Equal(2, scoreMetadata.Schema.Count);

            // Check metadata columns' names.
            Assert.Equal(MetadataUtils.Kinds.ScoreColumnKind, scoreMetadata.Schema[0].Name);
            Assert.Equal(MetadataUtils.Kinds.ScoreValueKind, scoreMetadata.Schema[1].Name);

            // Check metadata columns' types.
            Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[0].Type);
            Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[1].Type);

            // Check metadata columns' values.
            var scoreColumnKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(0);
            ReadOnlyMemory <char> scoreColumnKindValue = null;

            scoreColumnKindGetter(ref scoreColumnKindValue);
            Assert.Equal(MetadataUtils.Const.ScoreColumnKind.SequenceClassification, scoreColumnKindValue.ToString());

            var scoreValueKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(1);
            ReadOnlyMemory <char> scoreValueKindValue = null;

            scoreValueKindGetter(ref scoreValueKindValue);
            Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreValueKindValue.ToString());
        }
 ISchemaBoundMapper ISchemaBindableMapper.Bind(IHostEnvironment env, RoleMappedSchema schema)
 => new FieldAwareFactorizationMachineScalarRowMapper(env, schema, ScoreSchemaFactory.CreateBinaryClassificationSchema(), this);