public void TestEP_PassThroughTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var iris = FileHelper.GetTestFile("iris.txt"); var outPass = FileHelper.GetOutputFile("data.idv", methodName); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features")); learningPipeline.Add(new Scikit.ML.EntryPoints.PassThrough() { Filename = outPass, SaveOnDisk = true }); learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8)); Assert.IsTrue(File.Exists(outPass)); }
public void TestEP_NearestNeighborsLPMc() { using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.EntryPoints.NearestNeighborsMultiClass()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 11)); } }
public void TestEP_Q_KMeansEntryPointAPIWithDataFrame() { var iris = FileHelper.GetTestFile("iris.txt"); var df = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = Scikit.ML.DataManipulation.DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 13)); }
public void TestEP_ScalerTransform() { var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features")); learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8)); }
public void TestEntryPointXGBoostBinary() { var env = EnvHelper.NewTestEnvironment(conc: 1); var iris = FileHelper.GetTestFile("iris_binary.txt"); var df = DataFrame.ReadCsv(iris, sep: '\t', dtypes: new DataKind?[] { DataKind.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.XGBoostWrapper.XGBoostBinary()); // Fails here due to missing variable. return; var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrame.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 9)); }
public void TestTreePathNewAPI() { using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor() { MaxIterations = 2 }); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8)); var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName); dfout.ToCsv(outfile); Assert.IsTrue(File.Exists(outfile)); } }