Example #1
0
        public void TestEP_PassThroughTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var iris       = FileHelper.GetTestFile("iris.txt");
            var outPass    = FileHelper.GetOutputFile("data.idv", methodName);
            var df         = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.PassThrough()
            {
                Filename = outPass, SaveOnDisk = true
            });
            learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
            Assert.IsTrue(File.Exists(outPass));
        }
Example #2
0
 public void TestEP_NearestNeighborsLPMc()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var iris             = FileHelper.GetTestFile("iris.txt");
         var df               = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
         var learningPipeline = new GenericLearningPipeline(conc: 1);
         learningPipeline.Add(importData);
         learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
         learningPipeline.Add(new Scikit.ML.EntryPoints.NearestNeighborsMultiClass());
         var predictor   = learningPipeline.Train();
         var predictions = predictor.Predict(df);
         var dfout       = DataFrameIO.ReadView(predictions);
         Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 11));
     }
 }
Example #3
0
        public void TestEP_Q_KMeansEntryPointAPIWithDataFrame()
        {
            var iris = FileHelper.GetTestFile("iris.txt");
            var df   = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = Scikit.ML.DataManipulation.DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 13));
        }
        public void TestEP_ScalerTransform()
        {
            var iris = FileHelper.GetTestFile("iris.txt");
            var df   = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features"));
            learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
        }
Example #5
0
        public void TestEntryPointXGBoostBinary()
        {
            var env  = EnvHelper.NewTestEnvironment(conc: 1);
            var iris = FileHelper.GetTestFile("iris_binary.txt");
            var df   = DataFrame.ReadCsv(iris, sep: '\t', dtypes: new DataKind?[] { DataKind.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.XGBoostWrapper.XGBoostBinary());
            // Fails here due to missing variable.
            return;

            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrame.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 9));
        }
 public void TestTreePathNewAPI()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
         var iris             = FileHelper.GetTestFile("iris.txt");
         var df               = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
         var learningPipeline = new GenericLearningPipeline();
         learningPipeline.Add(importData);
         learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
         learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()
         {
             MaxIterations = 2
         });
         var predictor   = learningPipeline.Train();
         var predictions = predictor.Predict(df);
         var dfout       = DataFrameIO.ReadView(predictions);
         Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
         var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName);
         dfout.ToCsv(outfile);
         Assert.IsTrue(File.Exists(outfile));
     }
 }