public void PurposeInferenceHiddenColumnsTest()
        {
            var context = new MLContext();

            // build basic data view
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Label, BooleanDataViewType.Instance);
            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            var       schema = schemaBuilder.ToSchema();
            IDataView data   = DataViewTestFixture.BuildDummyDataView(schema);

            // normalize 'Features' column. this has the effect of creating 2 columns named
            // 'Features' in the data view, the first of which gets marked as 'Hidden'
            var normalizer = context.Transforms.NormalizeMinMax(DefaultColumnNames.Features);

            data = normalizer.Fit(data).Transform(data);

            // infer purposes
            var purposes = PurposeInference.InferPurposes(context, data, new ColumnInformation());

            Assert.Equal(3, purposes.Count());
            Assert.Equal(ColumnPurpose.Label, purposes[0].Purpose);
            // assert first 'Features' purpose (hidden column) is Ignore
            Assert.Equal(ColumnPurpose.Ignore, purposes[1].Purpose);
            // assert second 'Features' purpose is NumericFeature
            Assert.Equal(ColumnPurpose.NumericFeature, purposes[2].Purpose);
        }
        public void ValidateEmptyTrainingDataThrows()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("Number", NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false);
            var ex       = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(),
                                                                                                                         null, TaskKind.Regression));

            Assert.StartsWith("Training data has 0 rows", ex.Message);
        }
        public void ValidateFeaturesColInvalidType()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Double);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression));

            Assert.StartsWith("Features column must be of data type Single", ex.Message);
        }
        public void ValidateColumnNotContainedInData()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema     = schemaBuilder.ToSchema();
            var dataView   = DataViewTestFixture.BuildDummyDataView(schema);
            var columnInfo = new ColumnInformation();

            columnInfo.CategoricalColumnNames.Add("Categorical");
            Assert.Throws <ArgumentException>(() => ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView));
        }
        public void ValidateProhibitedFeatureColumnType()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("UInt64", NumberDataViewType.UInt64);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(),
                                                                                                                   null, TaskKind.Regression));

            Assert.StartsWith("Only supported feature column types are Boolean, Single, and String. Please change the feature column UInt64 of type UInt64 to one of the supported types.", ex.Message);
        }
        private static void TestApplyTransformsToRealDataView(IEnumerable <SuggestedTransform> transforms,
                                                              IEnumerable <DatasetColumnInfo> columns)
        {
            // create a dummy data view from input columns
            var data = DataViewTestFixture.BuildDummyDataView(columns);

            // iterate thru suggested transforms and apply it to a real data view
            foreach (var transform in transforms.Select(t => t.Estimator))
            {
                data = transform.Fit(data).Transform(data);
            }

            // assert Features column of type 'R4' exists
            var featuresCol = data.Schema.GetColumnOrNull(DefaultColumnNames.Features);

            Assert.NotNull(featuresCol);
            Assert.True(featuresCol.Value.Type.IsVector());
            Assert.Equal(NumberDataViewType.Single, featuresCol.Value.Type.GetItemType());
        }
Ejemplo n.º 7
0
        public void ValidateTextColumnNotText()
        {
            const string TextPurposeColName = "TextColumn";
            var          schemaBuilder      = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var columnInfo = new ColumnInformation();

            columnInfo.TextColumnNames.Add(TextPurposeColName);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression));

            Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message);
        }
Ejemplo n.º 8
0
        public void ValidateEmptyValidationDataThrows()
        {
            // Training data
            var dataViewBuilder = new ArrayDataViewBuilder(new MLContext());

            dataViewBuilder.AddColumn("Number", NumberDataViewType.Single, 0f);
            dataViewBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, 0f);
            var trainingData = dataViewBuilder.GetDataView();

            // Validation data
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("Number", NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema         = schemaBuilder.ToSchema();
            var validationData = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(trainingData, new ColumnInformation(),
                                                                                                                   validationData, TaskKind.Regression));

            Assert.StartsWith("Validation data has 0 rows", ex.Message);
        }