public void PurposeInferenceHiddenColumnsTest() { var context = new MLContext(); // build basic data view var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Label, BooleanDataViewType.Instance); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); IDataView data = DataViewTestFixture.BuildDummyDataView(schema); // normalize 'Features' column. this has the effect of creating 2 columns named // 'Features' in the data view, the first of which gets marked as 'Hidden' var normalizer = context.Transforms.NormalizeMinMax(DefaultColumnNames.Features); data = normalizer.Fit(data).Transform(data); // infer purposes var purposes = PurposeInference.InferPurposes(context, data, new ColumnInformation()); Assert.Equal(3, purposes.Count()); Assert.Equal(ColumnPurpose.Label, purposes[0].Purpose); // assert first 'Features' purpose (hidden column) is Ignore Assert.Equal(ColumnPurpose.Ignore, purposes[1].Purpose); // assert second 'Features' purpose is NumericFeature Assert.Equal(ColumnPurpose.NumericFeature, purposes[2].Purpose); }
public void ValidateEmptyTrainingDataThrows() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("Number", NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression)); Assert.StartsWith("Training data has 0 rows", ex.Message); }
public void ValidateFeaturesColInvalidType() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Double); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression)); Assert.StartsWith("Features column must be of data type Single", ex.Message); }
public void ValidateColumnNotContainedInData() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.CategoricalColumnNames.Add("Categorical"); Assert.Throws <ArgumentException>(() => ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView)); }
public void ValidateProhibitedFeatureColumnType() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("UInt64", NumberDataViewType.UInt64); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression)); Assert.StartsWith("Only supported feature column types are Boolean, Single, and String. Please change the feature column UInt64 of type UInt64 to one of the supported types.", ex.Message); }
private static void TestApplyTransformsToRealDataView(IEnumerable <SuggestedTransform> transforms, IEnumerable <DatasetColumnInfo> columns) { // create a dummy data view from input columns var data = DataViewTestFixture.BuildDummyDataView(columns); // iterate thru suggested transforms and apply it to a real data view foreach (var transform in transforms.Select(t => t.Estimator)) { data = transform.Fit(data).Transform(data); } // assert Features column of type 'R4' exists var featuresCol = data.Schema.GetColumnOrNull(DefaultColumnNames.Features); Assert.NotNull(featuresCol); Assert.True(featuresCol.Value.Type.IsVector()); Assert.Equal(NumberDataViewType.Single, featuresCol.Value.Type.GetItemType()); }
public void ValidateTextColumnNotText() { const string TextPurposeColName = "TextColumn"; var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.TextColumnNames.Add(TextPurposeColName); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression)); Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message); }
public void ValidateEmptyValidationDataThrows() { // Training data var dataViewBuilder = new ArrayDataViewBuilder(new MLContext()); dataViewBuilder.AddColumn("Number", NumberDataViewType.Single, 0f); dataViewBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, 0f); var trainingData = dataViewBuilder.GetDataView(); // Validation data var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("Number", NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var validationData = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(trainingData, new ColumnInformation(), validationData, TaskKind.Regression)); Assert.StartsWith("Validation data has 0 rows", ex.Message); }