public void SequencePredictorSchemaTest() { int keyCount = 10; var scoreColumnType = new KeyType(DataKind.U4, 0, keyCount); VBuffer <ReadOnlyMemory <char> > keyNames = GenerateKeyNames(keyCount); var sequenceSchema = ScoreSchemaFactory.CreateSequencePredictionSchema(scoreColumnType, MetadataUtils.Const.ScoreColumnKind.SequenceClassification, keyNames); // Output schema should only contain one column, which is the predicted label. Assert.Single(sequenceSchema); var scoreColumn = sequenceSchema[0]; // Check score column name. Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreColumn.Name); // Check score column type. Assert.True(scoreColumn.Type.IsKey); Assert.Equal((scoreColumnType as KeyType).Min, (scoreColumn.Type as KeyType).Min); Assert.Equal((scoreColumnType as KeyType).Count, (scoreColumn.Type as KeyType).Count); Assert.Equal((scoreColumnType as KeyType).RawKind, (scoreColumn.Type as KeyType).RawKind); Assert.Equal((scoreColumnType as KeyType).Contiguous, (scoreColumn.Type as KeyType).Contiguous); // Check metadata. Because keyNames is not empty, there should be three metadata fields. var scoreMetadata = scoreColumn.Metadata; Assert.Equal(3, scoreMetadata.Schema.Count); // Check metadata columns' names. Assert.Equal(MetadataUtils.Kinds.KeyValues, scoreMetadata.Schema[0].Name); Assert.Equal(MetadataUtils.Kinds.ScoreColumnKind, scoreMetadata.Schema[1].Name); Assert.Equal(MetadataUtils.Kinds.ScoreValueKind, scoreMetadata.Schema[2].Name); // Check metadata columns' types. Assert.True(scoreMetadata.Schema[0].Type.IsVector); Assert.Equal(keyNames.Length, (scoreMetadata.Schema[0].Type as VectorType).VectorSize); Assert.Equal(TextType.Instance, (scoreMetadata.Schema[0].Type as VectorType).ItemType); Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[1].Type); Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[2].Type); // Check metadata columns' values. var keyNamesGetter = scoreMetadata.GetGetter <VBuffer <ReadOnlyMemory <char> > >(0); var actualKeyNames = new VBuffer <ReadOnlyMemory <char> >(); keyNamesGetter(ref actualKeyNames); Assert.Equal(keyNames.Length, actualKeyNames.Length); Assert.Equal(keyNames.DenseValues(), actualKeyNames.DenseValues()); var scoreColumnKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(1); ReadOnlyMemory <char> scoreColumnKindValue = null; scoreColumnKindGetter(ref scoreColumnKindValue); Assert.Equal(MetadataUtils.Const.ScoreColumnKind.SequenceClassification, scoreColumnKindValue.ToString()); var scoreValueKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(2); ReadOnlyMemory <char> scoreValueKindValue = null; scoreValueKindGetter(ref scoreValueKindValue); Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreValueKindValue.ToString()); }
public void SequencePredictorSchemaTest() { int keyCount = 10; var expectedScoreColumnType = new KeyDataViewType(typeof(uint), keyCount); VBuffer <ReadOnlyMemory <char> > keyNames = GenerateKeyNames(keyCount); var sequenceSchema = ScoreSchemaFactory.CreateSequencePredictionSchema(expectedScoreColumnType, AnnotationUtils.Const.ScoreColumnKind.SequenceClassification, keyNames); // Output schema should only contain one column, which is the predicted label. Assert.Single(sequenceSchema); var scoreColumn = sequenceSchema[0]; // Check score column name. Assert.Equal(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreColumn.Name); // Check score column type. var actualScoreColumnType = scoreColumn.Type as KeyDataViewType; Assert.NotNull(actualScoreColumnType); Assert.Equal(expectedScoreColumnType.Count, actualScoreColumnType.Count); Assert.Equal(expectedScoreColumnType.RawType, actualScoreColumnType.RawType); // Check metadata. Because keyNames is not empty, there should be three metadata fields. var scoreMetadata = scoreColumn.Annotations; Assert.Equal(3, scoreMetadata.Schema.Count); // Check metadata columns' names. Assert.Equal(AnnotationUtils.Kinds.KeyValues, scoreMetadata.Schema[0].Name); Assert.Equal(AnnotationUtils.Kinds.ScoreColumnKind, scoreMetadata.Schema[1].Name); Assert.Equal(AnnotationUtils.Kinds.ScoreValueKind, scoreMetadata.Schema[2].Name); // Check metadata columns' types. Assert.True(scoreMetadata.Schema[0].Type is VectorDataViewType); Assert.Equal(keyNames.Length, (scoreMetadata.Schema[0].Type as VectorDataViewType).Size); Assert.Equal(TextDataViewType.Instance, (scoreMetadata.Schema[0].Type as VectorDataViewType).ItemType); Assert.Equal(TextDataViewType.Instance, scoreColumn.Annotations.Schema[1].Type); Assert.Equal(TextDataViewType.Instance, scoreColumn.Annotations.Schema[2].Type); // Check metadata columns' values. var keyNamesGetter = scoreMetadata.GetGetter <VBuffer <ReadOnlyMemory <char> > >(scoreMetadata.Schema[0]); var actualKeyNames = new VBuffer <ReadOnlyMemory <char> >(); keyNamesGetter(ref actualKeyNames); Assert.Equal(keyNames.Length, actualKeyNames.Length); Assert.Equal(keyNames.DenseValues(), actualKeyNames.DenseValues()); var scoreColumnKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(scoreMetadata.Schema[1]); ReadOnlyMemory <char> scoreColumnKindValue = null; scoreColumnKindGetter(ref scoreColumnKindValue); Assert.Equal(AnnotationUtils.Const.ScoreColumnKind.SequenceClassification, scoreColumnKindValue.ToString()); var scoreValueKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(scoreMetadata.Schema[2]); ReadOnlyMemory <char> scoreValueKindValue = null; scoreValueKindGetter(ref scoreValueKindValue); Assert.Equal(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreValueKindValue.ToString()); }
public BoundBase(SchemaBindablePipelineEnsembleBase parent, RoleMappedSchema schema) { Parent = parent; InputRoleMappedSchema = schema; OutputSchema = ScoreSchemaFactory.Create(Parent.ScoreType, Parent._scoreColumnKind); _inputColIndices = new HashSet <int>(); for (int i = 0; i < Parent._inputCols.Length; i++) { var name = Parent._inputCols[i]; var col = InputRoleMappedSchema.Schema.GetColumnOrNull(name); if (!col.HasValue) { throw Parent.Host.ExceptSchemaMismatch(nameof(InputRoleMappedSchema), "input", name); } _inputColIndices.Add(col.Value.Index); } Mappers = new ISchemaBoundRowMapper[Parent.PredictorModels.Length]; BoundPipelines = new IRowToRowMapper[Parent.PredictorModels.Length]; ScoreCols = new int[Parent.PredictorModels.Length]; for (int i = 0; i < Mappers.Length; i++) { // Get the RoleMappedSchema to pass to the predictor. var emptyDv = new EmptyDataView(Parent.Host, schema.Schema); Parent.PredictorModels[i].PrepareData(Parent.Host, emptyDv, out RoleMappedData rmd, out IPredictor predictor); // Get the predictor as a bindable mapper, and bind it to the RoleMappedSchema found above. var bindable = ScoreUtils.GetSchemaBindableMapper(Parent.Host, Parent.PredictorModels[i].Predictor); Mappers[i] = bindable.Bind(Parent.Host, rmd.Schema) as ISchemaBoundRowMapper; if (Mappers[i] == null) { throw Parent.Host.Except("Predictor {0} is not a row to row mapper", i); } // Make sure there is a score column, and remember its index. var scoreCol = Mappers[i].OutputSchema.GetColumnOrNull(MetadataUtils.Const.ScoreValueKind.Score); if (!scoreCol.HasValue) { throw Parent.Host.Except("Predictor {0} does not contain a score column", i); } ScoreCols[i] = scoreCol.Value.Index; // Get the pipeline. var dv = new EmptyDataView(Parent.Host, schema.Schema); var tm = new TransformModelImpl(Parent.Host, dv, dv); var pipeline = Parent.PredictorModels[i].TransformModel.Apply(Parent.Host, tm); BoundPipelines[i] = pipeline.AsRowToRowMapper(Parent.Host); if (BoundPipelines[i] == null) { throw Parent.Host.Except("Transform pipeline {0} contains transforms that do not implement IRowToRowMapper", i); } } }
public void SequencePredictorSchemaWithoutKeyNamesMetadataTest() { int keyCount = 10; var expectedScoreColumnType = new KeyType(DataKind.U4, 0, keyCount); VBuffer <ReadOnlyMemory <char> > keyNames = GenerateKeyNames(0); var sequenceSchema = ScoreSchemaFactory.CreateSequencePredictionSchema(expectedScoreColumnType, MetadataUtils.Const.ScoreColumnKind.SequenceClassification, keyNames); // Output schema should only contain one column, which is the predicted label. Assert.Single(sequenceSchema); var scoreColumn = sequenceSchema[0]; // Check score column name. Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreColumn.Name); // Check score column type. var actualScoreColumnType = scoreColumn.Type as KeyType; Assert.NotNull(actualScoreColumnType); Assert.Equal(expectedScoreColumnType.Min, actualScoreColumnType.Min); Assert.Equal(expectedScoreColumnType.Count, actualScoreColumnType.Count); Assert.Equal(expectedScoreColumnType.RawType, actualScoreColumnType.RawType); Assert.Equal(expectedScoreColumnType.Contiguous, actualScoreColumnType.Contiguous); // Check metadata. Because keyNames is not empty, there should be three metadata fields. var scoreMetadata = scoreColumn.Metadata; Assert.Equal(2, scoreMetadata.Schema.Count); // Check metadata columns' names. Assert.Equal(MetadataUtils.Kinds.ScoreColumnKind, scoreMetadata.Schema[0].Name); Assert.Equal(MetadataUtils.Kinds.ScoreValueKind, scoreMetadata.Schema[1].Name); // Check metadata columns' types. Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[0].Type); Assert.Equal(TextType.Instance, scoreColumn.Metadata.Schema[1].Type); // Check metadata columns' values. var scoreColumnKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(0); ReadOnlyMemory <char> scoreColumnKindValue = null; scoreColumnKindGetter(ref scoreColumnKindValue); Assert.Equal(MetadataUtils.Const.ScoreColumnKind.SequenceClassification, scoreColumnKindValue.ToString()); var scoreValueKindGetter = scoreMetadata.GetGetter <ReadOnlyMemory <char> >(1); ReadOnlyMemory <char> scoreValueKindValue = null; scoreValueKindGetter(ref scoreValueKindValue); Assert.Equal(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreValueKindValue.ToString()); }
ISchemaBoundMapper ISchemaBindableMapper.Bind(IHostEnvironment env, RoleMappedSchema schema) => new FieldAwareFactorizationMachineScalarRowMapper(env, schema, ScoreSchemaFactory.CreateBinaryClassificationSchema(), this);