public void TestPipelineIris() { var iris = FileHelper.GetTestFile("iris_data_id.txt"); var df = DataFrameIO.ReadCsv(iris, sep: ',', dtypes: new[] { NumberType.R4, NumberType.R4, NumberType.R4 }); var env3 = PyEnvHelper.CreateStoreEnvironment(); var pipe = PyPipelineHelper.CreateScikitPipeline(new string[] { "Concat{col=Features:Slength,Swidth}" }, "mlr", env3.Item1); pipe.Train(df, "Features", "Label"); var sout = env3.Item2.ToString(); Assert.IsNotNull(sout); }
public void TestTrainIris() { var file = FileHelper.GetTestFile("iris_data_id.txt"); var df = PyDataFrameHelper.ReadCsv(file); df.AddColumn("LabelR4", df["Label"].AsType(NumberType.R4)); var host = PyEnvHelper.CreateConsoleEnvironment(); var pipe = PyPipelineHelper.CreateScikitPipeline(new[] { "concat{col=Feat:Slength,Swidth,Plength,Pwidth}" }, "oova{p=ap}", host); DataFrame dfo; using (var res = pipe.Train(df, "Feat", "LabelR4")) dfo = PyDataFrameHelper.ReadView(pipe.Predict(df)); Assert.AreEqual(dfo.Shape, new Tuple <int, int>(150, 14)); }
public void TestPipelineDiabete() { var diab = FileHelper.GetTestFile("diabete.csv"); var cols = Enumerable.Range(0, 10).Select(c => NumberType.R4).ToArray(); var colsName = string.Join(',', Enumerable.Range(0, 10).Select(c => $"F{c}")); var df = DataFrameIO.ReadCsv(diab, sep: ',', dtypes: cols); var env3 = PyEnvHelper.CreateStoreEnvironment(); var pipe = PyPipelineHelper.CreateScikitPipeline(new string[] { $"Concat{{col=Features:{colsName}}}" }, "ols", env3.Item1); pipe.Train(df, "Features", "Label"); var sout = env3.Item2.ToString(); Assert.IsNotNull(sout); DataFrame pred = PyPipelineHelper.FastPredictOrTransform(pipe, df); Assert.IsTrue(df.Shape[0] > 0); }