void TestSelectWorkout() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var invalidData = new [] { new TestClass2 { D = 3, E = 5 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData); // Workout on keep columns var est = new ColumnSelectingEstimator(Env, new[] { "A", "B" }, null, true, false); TestEstimatorCore(est, validFitInput: dataView, invalidInput: invalidDataView); // Workout on drop columns est = new ColumnSelectingEstimator(Env, null, new[] { "A", "B" }, true, false); TestEstimatorCore(est, validFitInput: dataView, invalidInput: invalidDataView); // Workout on keep columns with ignore mismatch -- using invalid data set est = new ColumnSelectingEstimator(Env, new[] { "A", "B" }, null, true, true); TestEstimatorCore(est, validFitInput: invalidDataView); }
void TestSelectKeepWithOrder() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); // Expected output will be CA var est = ColumnSelectingEstimator.KeepColumns(Env, "C", "A"); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var foundColumnA = result.Schema.TryGetColumnIndex("A", out int aIdx); var foundColumnB = result.Schema.TryGetColumnIndex("B", out int bIdx); var foundColumnC = result.Schema.TryGetColumnIndex("C", out int cIdx); Assert.True(foundColumnA); Assert.Equal(1, aIdx); Assert.False(foundColumnB); Assert.True(foundColumnC); Assert.Equal(0, cIdx); }
void TestSelectColumnsWithMissing() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var est = ColumnSelectingEstimator.KeepColumns(Env, "D", "G"); Assert.Throws <ArgumentOutOfRangeException>(() => est.Fit(dataView)); }
public void TestSelectColumnsWithMissing() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.LoadFromEnumerable(data); var est = ColumnSelectingEstimator.KeepColumns(Env, "D", "G"); Assert.Throws <ArgumentOutOfRangeException>(() => est.Fit(dataView)); }
void TestSelectDrop() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.LoadFromEnumerable(data); var est = ColumnSelectingEstimator.DropColumns(Env, "A", "C"); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var foundColumnA = result.Schema.TryGetColumnIndex("A", out int aIdx); var foundColumnB = result.Schema.TryGetColumnIndex("B", out int bIdx); var foundColumnC = result.Schema.TryGetColumnIndex("C", out int cIdx); Assert.False(foundColumnA); Assert.True(foundColumnB); Assert.Equal(0, bIdx); Assert.False(foundColumnC); }
void TestSelectDrop() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var est = new ColumnSelectingEstimator(Env, null, new string[] { "A", "C" }); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var foundColumnA = result.Schema.TryGetColumnIndex("A", out int aIdx); var foundColumnB = result.Schema.TryGetColumnIndex("B", out int bIdx); var foundColumnC = result.Schema.TryGetColumnIndex("C", out int cIdx); Assert.False(foundColumnA); Assert.Equal(0, aIdx); Assert.True(foundColumnB); Assert.Equal(0, bIdx); Assert.False(foundColumnC); Assert.Equal(0, cIdx); }
/// <summary> /// DropColumns is used to select a list of columns that user wants to drop from a given input. Any column not specified will /// be maintained in the output schema. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columnsToDrop">The array of column names to drop.</param> public static ColumnSelectingEstimator DropColumns(this TransformsCatalog catalog, params string[] columnsToDrop) => ColumnSelectingEstimator.DropColumns(CatalogUtils.GetEnvironment(catalog), columnsToDrop);