Beispiel #1
0
        public void TestEP_PassThroughTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var iris       = FileHelper.GetTestFile("iris.txt");
            var outPass    = FileHelper.GetOutputFile("data.idv", methodName);
            var df         = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.PassThrough()
            {
                Filename = outPass, SaveOnDisk = true
            });
            learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
            Assert.IsTrue(File.Exists(outPass));
        }
Beispiel #2
0
        /// <summary>
        /// Reads a file as a <see cref="DataFrame"/>.
        /// Follows pandas API.
        /// </summary>
        public static DataFrame ReadCsv(string filename, char sep = ',', bool header   = true,
                                        string[] names            = null, int[] dtypes = null,
                                        int nrows  = -1, int guess_rows = 10, string encoding = null,
                                        bool index = false)
        {
            var kinds = IntToColumnTypes(dtypes);

            return(DataFrameIO.ReadCsv(filename, sep, header, names, kinds, nrows, guess_rows,
                                       encoding == null ? null : Encoding.GetEncoding(encoding),
                                       index: index));
        }
Beispiel #3
0
 public void Train()
 {
     using (var env = new ConsoleEnvironment())
     {
         var df = DataFrameIO.ReadCsv(_dataset, sep: ',',
                                      dtypes: new ColumnType[] { NumberType.R4 });
         var concat = "Concat{col=Features:F0,F1,F2,F3,F4,F5,F6,F7,F8,F9}";
         var pipe   = new ScikitPipeline(new[] { concat }, "ftr{iter=10}");
         pipe.Train(df, "Features", "Label");
         _pipeline = pipe;
     }
 }
Beispiel #4
0
        public void TestPipelineIris()
        {
            var iris = FileHelper.GetTestFile("iris_data_id.txt");
            var df   = DataFrameIO.ReadCsv(iris, sep: ',', dtypes: new[] { NumberType.R4, NumberType.R4, NumberType.R4 });
            var env3 = PyEnvHelper.CreateStoreEnvironment();
            var pipe = PyPipelineHelper.CreateScikitPipeline(new string[] { "Concat{col=Features:Slength,Swidth}" },
                                                             "mlr", env3.Item1);

            pipe.Train(df, "Features", "Label");
            var sout = env3.Item2.ToString();

            Assert.IsNotNull(sout);
        }
Beispiel #5
0
        public void TestScikitAPI_TrainingDiabete()
        {
            var diab     = FileHelper.GetTestFile("diabete.csv");
            var cols     = Enumerable.Range(0, 10).Select(c => NumberType.R4).ToArray();
            var colsName = string.Join(',', Enumerable.Range(0, 10).Select(c => $"F{c}"));
            var df       = DataFrameIO.ReadCsv(diab, sep: ',', dtypes: cols);
            var pipe     = new ScikitPipeline(new string[] { $"Concat{{col=Features:{colsName}}}" }, "ols");

            pipe.Train(df, "Features", "Label");
            DataFrame pred = null;

            pipe.Predict(df, ref pred);
            Assert.AreEqual(pred.Shape, new ShapeType(83, 13));
        }
Beispiel #6
0
        public void TestScikitAPI_TrainingWithIris()
        {
            var iris = FileHelper.GetTestFile("iris.txt");
            var df   = DataFrameIO.ReadCsv(iris, sep: '\t');

            df.AddColumn("LabelI", df["Label"].AsType(NumberType.R4));
            var pipe = new ScikitPipeline(new[] { $"Concat{{col=Features:{df.Columns[1]},{df.Columns[2]}}}" }, "mlr");

            pipe.Train(df, "Features", "LabelI");
            DataFrame pred = null;

            pipe.Predict(df, ref pred);
            Assert.AreEqual(pred.Shape, new ShapeType(150, 9));
        }
Beispiel #7
0
 public void TestEP_NearestNeighborsLPMc()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var iris             = FileHelper.GetTestFile("iris.txt");
         var df               = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
         var learningPipeline = new GenericLearningPipeline(conc: 1);
         learningPipeline.Add(importData);
         learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
         learningPipeline.Add(new Scikit.ML.EntryPoints.NearestNeighborsMultiClass());
         var predictor   = learningPipeline.Train();
         var predictions = predictor.Predict(df);
         var dfout       = DataFrameIO.ReadView(predictions);
         Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 11));
     }
 }
Beispiel #8
0
        public void TestPipelineDiabete()
        {
            var diab     = FileHelper.GetTestFile("diabete.csv");
            var cols     = Enumerable.Range(0, 10).Select(c => NumberType.R4).ToArray();
            var colsName = string.Join(',', Enumerable.Range(0, 10).Select(c => $"F{c}"));
            var df       = DataFrameIO.ReadCsv(diab, sep: ',', dtypes: cols);
            var env3     = PyEnvHelper.CreateStoreEnvironment();
            var pipe     = PyPipelineHelper.CreateScikitPipeline(new string[] { $"Concat{{col=Features:{colsName}}}" }, "ols", env3.Item1);

            pipe.Train(df, "Features", "Label");
            var sout = env3.Item2.ToString();

            Assert.IsNotNull(sout);
            DataFrame pred = PyPipelineHelper.FastPredictOrTransform(pipe, df);

            Assert.IsTrue(df.Shape[0] > 0);
        }
        public void TestEP_ScalerTransform()
        {
            var iris = FileHelper.GetTestFile("iris.txt");
            var df   = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });

            var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
            var learningPipeline = new GenericLearningPipeline(conc: 1);

            learningPipeline.Add(importData);
            learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
            learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features"));
            learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor());
            var predictor   = learningPipeline.Train();
            var predictions = predictor.Predict(df);
            var dfout       = DataFrameIO.ReadView(predictions);

            Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
        }
Beispiel #10
0
 public void TestScikitAPI_MKL_TrainingDiabete()
 {
     try
     {
         var diab     = FileHelper.GetTestFile("diabete.csv");
         var cols     = Enumerable.Range(0, 10).Select(c => NumberDataViewType.Single).ToArray();
         var colsName = string.Join(',', Enumerable.Range(0, 10).Select(c => $"F{c}"));
         var df       = DataFrameIO.ReadCsv(diab, sep: ',', dtypes: cols);
         var pipe     = new ScikitPipeline(new string[] { $"Concat{{col=Features:{colsName}}}" }, "ols");
         pipe.Train(df, "Features", "Label");
         DataFrame pred = null;
         pipe.Predict(df, ref pred);
         Assert.AreEqual(pred.Shape, new ShapeType(83, 13));
     }
     catch (DllNotFoundException e)
     {
         var os = Environment.OSVersion;
         if (os.Platform == PlatformID.Unix)
         {
             Console.WriteLine("FAIL(1): TestScikitAPI_MKL due to {0}", e.ToString());
             return;
         }
         else
         {
             Console.WriteLine("FAIL(1): TestScikitAPI_MKL, OS={0}", os.ToString());
             throw e;
         }
     }
     catch (NotSupportedException e)
     {
         var os = Environment.OSVersion;
         if (os.Platform == PlatformID.Unix)
         {
             Console.WriteLine("FAIL(2): TestScikitAPI_MKL due to {0}", e.ToString());
             return;
         }
         else
         {
             Console.WriteLine("FAIL(2): TestScikitAPI_MKL, OS={0}", os.ToString());
             throw e;
         }
     }
 }
 public void TestTreePathInnerAPI()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
         var iris       = FileHelper.GetTestFile("iris.txt");
         var df         = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         using (var pipe = new ScikitPipeline(new[] { "Concat{col=Feature:Sepal_length,Sepal_width}",
                                                      "TreeFeat{tr=ft{iter=2} lab=Label feat=Feature}" }))
         {
             pipe.Train(df);
             var scorer = pipe.Predict(df);
             var dfout  = DataFrameIO.ReadView(scorer);
             Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 31));
             var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName);
             dfout.ToCsv(outfile);
             Assert.IsTrue(File.Exists(outfile));
         }
     }
 }
Beispiel #12
0
        public void TestOnnx_TrainingWithIris()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;

            // direct call
            var iris = FileHelper.GetTestFile("iris.txt");
            var df   = DataFrameIO.ReadCsv(iris, sep: '\t');

            df.AddColumn("LabelI", df["Label"].AsType(NumberDataViewType.Single));
            var pipe = new ScikitPipeline(new[] { $"Concat{{col=Features:{df.Columns[1]},{df.Columns[2]}}}" }, "mlr");

            pipe.Train(df, "Features", "LabelI");
            DataFrame pred = null;

            pipe.Predict(df, ref pred);

            // Onnx Save
            var output = FileHelper.GetOutputFile("model.onnx", methodName);
            var model  = pipe.ToOnnx();

            model.Save(output);
            Assert.IsTrue(File.Exists(output));

            // Onnx save no concat.
            output = FileHelper.GetOutputFile("model_vector.onnx", methodName);
            model  = pipe.ToOnnx(1);
            model.Save(output);
            Assert.IsTrue(File.Exists(output));

            // Onnx Load Not implemented yet.

            /*
             * var restored = new ScikitPipeline(output);
             * DataFrame pred2 = null;
             * restored.Predict(df, ref pred2);
             * pred.AssertAlmostEqual(pred2);
             */
        }
 public void TestTreePathNewAPI()
 {
     using (var env = EnvHelper.NewTestEnvironment(conc: 1))
     {
         var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
         var iris             = FileHelper.GetTestFile("iris.txt");
         var df               = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
         var importData       = df.EPTextLoader(iris, sep: '\t', header: true);
         var learningPipeline = new GenericLearningPipeline();
         learningPipeline.Add(importData);
         learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width"));
         learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()
         {
             MaxIterations = 2
         });
         var predictor   = learningPipeline.Train();
         var predictions = predictor.Predict(df);
         var dfout       = DataFrameIO.ReadView(predictions);
         Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8));
         var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName);
         dfout.ToCsv(outfile);
         Assert.IsTrue(File.Exists(outfile));
     }
 }