public static DataFrame FastPredictOrTransform(ScikitPipeline pipe, DataFrame df, int conc = 1) { DataFrame res = null; pipe.Predict(df, ref res, conc); return(res); }
private void ScikitAPI_SimpleTransform_Load(bool removeFirstTransform) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var output = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName); var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; string expected = null; using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); Assert.IsTrue(predictor != null); var data2 = host.CreateStreamingDataView(inputs2); var predictions = pipe.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); expected = dfs2; Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25"); pipe.Save(output, removeFirstTransform); } } using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data2 = host.CreateStreamingDataView(inputs2); using (var pipe2 = new ScikitPipeline(output, host)) { var predictions = pipe2.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(expected, dfs2); } } }
public void TestTagTrainOrScoreTransformCustomScorer() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("mc_iris.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); using (var env = EnvHelper.NewTestEnvironment()) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=-}", new MultiFileSource(dataFilePath)); using (var pipe = new ScikitPipeline(new[] { "Concat{col=Feature:Slength,Swidth}", "TagTrainScore{tr=iova{p=ft{nl=10 iter=1}} lab=Label feat=Feature tag=model scorer=MultiClassClassifierScorer{ex=AA}}" }, host: env)) { pipe.Train(loader); var pred = pipe.Predict(loader); var df = DataFrameIO.ReadView(pred); Assert.AreEqual(df.Shape, new Tuple <int, int>(150, 11)); var dfs = df.Head().ToString(); Assert.IsTrue(dfs.StartsWith("Label,Slength,Swidth,Plength,Pwidth,Feature.0,Feature.1,PredictedLabelAA,ScoreAA.0,ScoreAA.1,ScoreAA.2")); } } }
public void TestScikitAPI_DelegateEnvironment() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; var stdout = new List <string>(); var stderr = new List <string>(); ILogWriter logout = new LogWriter((string s) => { stdout.Add(s); }); ILogWriter logerr = new LogWriter((string s) => { stderr.Add(s); }); using (var host = new DelegateEnvironment(conc: 1, outWriter: logout, errWriter: logerr, verbose: 3)) using (var ch = host.Start("Train Pipeline")) { ComponentHelper.AddStandardComponents(host); ch.Info(MessageSensitivity.All, "Polynomial"); var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); if (predictor == null) { throw new Exception("Predictor is null"); } } } if (stdout.Count == 0) { throw new Exception("stdout is empty."); } if (stderr.Count != 0) { throw new Exception($"stderr not empty\n{string.Join("\n", stderr)}"); } }
public void TestScikitAPI_SimplePredictor() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); var data2 = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2)); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")); } } }
public void Train() { using (var env = new ConsoleEnvironment()) { var df = DataFrameIO.ReadCsv(_dataset, sep: ',', dtypes: new ColumnType[] { NumberType.R4 }); var concat = "Concat{col=Features:F0,F1,F2,F3,F4,F5,F6,F7,F8,F9}"; var pipe = new ScikitPipeline(new[] { concat }, "ftr{iter=10}"); pipe.Train(df, "Features", "Label"); _pipeline = pipe; } }
public void TestScikitAPI_TrainingDiabete() { var diab = FileHelper.GetTestFile("diabete.csv"); var cols = Enumerable.Range(0, 10).Select(c => NumberType.R4).ToArray(); var colsName = string.Join(',', Enumerable.Range(0, 10).Select(c => $"F{c}")); var df = DataFrameIO.ReadCsv(diab, sep: ',', dtypes: cols); var pipe = new ScikitPipeline(new string[] { $"Concat{{col=Features:{colsName}}}" }, "ols"); pipe.Train(df, "Features", "Label"); DataFrame pred = null; pipe.Predict(df, ref pred); Assert.AreEqual(pred.Shape, new ShapeType(83, 13)); }
public void TestScikitAPI_TrainingWithIris() { var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t'); df.AddColumn("LabelI", df["Label"].AsType(NumberType.R4)); var pipe = new ScikitPipeline(new[] { $"Concat{{col=Features:{df.Columns[1]},{df.Columns[2]}}}" }, "mlr"); pipe.Train(df, "Features", "LabelI"); DataFrame pred = null; pipe.Predict(df, ref pred); Assert.AreEqual(pred.Shape, new ShapeType(150, 9)); }
public void TestLoadModelFromNimbusML() { var iris = FileHelper.GetTestFile("model_iris.zip"); using (var env = EnvHelper.NewTestEnvironment()) { try { using (var pipe2 = new ScikitPipeline(iris, env)) { } } catch (Exception e) { Assert.IsTrue(e.ToString().Contains("because the model is too old")); } } }
public void TestScikitAPI_DelegateEnvironmentVerbose0() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; var stdout = new List <string>(); var stderr = new List <string>(); ILogWriter logout = new LogWriter(s => stdout.Add(s)); ILogWriter logerr = new LogWriter(s => stderr.Add(s)); /*using (*/ var host = new DelegateEnvironment(seed: 0, outWriter: logout, errWriter: logerr, verbose: 0); { ComponentHelper.AddStandardComponents(host); var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host: host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); } } Assert.AreEqual(stdout.Count, 0); Assert.AreEqual(stderr.Count, 0); }
public void TestScikitAPI_MKL_TrainingDiabete() { try { var diab = FileHelper.GetTestFile("diabete.csv"); var cols = Enumerable.Range(0, 10).Select(c => NumberDataViewType.Single).ToArray(); var colsName = string.Join(',', Enumerable.Range(0, 10).Select(c => $"F{c}")); var df = DataFrameIO.ReadCsv(diab, sep: ',', dtypes: cols); var pipe = new ScikitPipeline(new string[] { $"Concat{{col=Features:{colsName}}}" }, "ols"); pipe.Train(df, "Features", "Label"); DataFrame pred = null; pipe.Predict(df, ref pred); Assert.AreEqual(pred.Shape, new ShapeType(83, 13)); } catch (DllNotFoundException e) { var os = Environment.OSVersion; if (os.Platform == PlatformID.Unix) { Console.WriteLine("FAIL(1): TestScikitAPI_MKL due to {0}", e.ToString()); return; } else { Console.WriteLine("FAIL(1): TestScikitAPI_MKL, OS={0}", os.ToString()); throw e; } } catch (NotSupportedException e) { var os = Environment.OSVersion; if (os.Platform == PlatformID.Unix) { Console.WriteLine("FAIL(2): TestScikitAPI_MKL due to {0}", e.ToString()); return; } else { Console.WriteLine("FAIL(2): TestScikitAPI_MKL, OS={0}", os.ToString()); throw e; } } }
public void TestScikitAPI_SimpleTransform() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); Assert.IsTrue(predictor != null); var data2 = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2); var predictions = pipe.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25"); } } }
public void TestTreePathInnerAPI() { using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); using (var pipe = new ScikitPipeline(new[] { "Concat{col=Feature:Sepal_length,Sepal_width}", "TreeFeat{tr=ft{iter=2} lab=Label feat=Feature}" })) { pipe.Train(df); var scorer = pipe.Predict(df); var dfout = DataFrameIO.ReadView(scorer); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 31)); var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName); dfout.ToCsv(outfile); Assert.IsTrue(File.Exists(outfile)); } } }
public void TestOnnx_TrainingWithIris() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; // direct call var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t'); df.AddColumn("LabelI", df["Label"].AsType(NumberDataViewType.Single)); var pipe = new ScikitPipeline(new[] { $"Concat{{col=Features:{df.Columns[1]},{df.Columns[2]}}}" }, "mlr"); pipe.Train(df, "Features", "LabelI"); DataFrame pred = null; pipe.Predict(df, ref pred); // Onnx Save var output = FileHelper.GetOutputFile("model.onnx", methodName); var model = pipe.ToOnnx(); model.Save(output); Assert.IsTrue(File.Exists(output)); // Onnx save no concat. output = FileHelper.GetOutputFile("model_vector.onnx", methodName); model = pipe.ToOnnx(1); model.Save(output); Assert.IsTrue(File.Exists(output)); // Onnx Load Not implemented yet. /* * var restored = new ScikitPipeline(output); * DataFrame pred2 = null; * restored.Predict(df, ref pred2); * pred.AssertAlmostEqual(pred2); */ }
public void TestScikitAPI_SimplePredictor_FastValueMapper() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; DataFrame df1, df2, df3; using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data = host.CreateStreamingDataView(inputs); var data2 = host.CreateStreamingDataView(inputs2); df1 = DataFrameIO.ReadView(data, env: host, keepVectors: true); df2 = DataFrameIO.ReadView(data2, env: host, keepVectors: true); df3 = DataFrameIO.ReadView(data2, env: host, keepVectors: true); } using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { DataFrame pred = null, pred2 = null; var predictor = pipe.Train(df1, feature: "X"); Assert.IsTrue(predictor != null); pipe.Predict(df2, ref pred); Assert.AreEqual(pred.Shape, new Tuple <int, int>(4, 3)); var dfs = pred.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception($"Wrong starts\n{dfs2}"); } pipe.Predict(df3, ref pred2); pred.AssertAlmostEqual(pred2); } } }
public void Read(string name) { _pipeline = new ScikitPipeline(name); }
private void ScikitAPI_SimplePredictor_Load(bool removeFirstTransform) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var output = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName); var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; string expected = null; using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); var data2 = host.CreateStreamingDataView(inputs2); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception($"Wrong starts\n{dfs2}"); } expected = dfs2; pipe.Save(output, removeFirstTransform); } } using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data2 = host.CreateStreamingDataView(inputs2); using (var pipe2 = new ScikitPipeline(output, host)) { var predictions = pipe2.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(expected, dfs2); } } }
public void TestTimeSeriesFloatRegression() { var inputs = new[] { new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, }; using (var host = EnvHelper.NewTestEnvironment()) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "concat{col=xt:time,one}" }, "sasdcar{iter=50}", host)) { pipe.Train(data, feature: "xt", label: "X"); var view = pipe.Predict(data); var df = DataFrameIO.ReadView(view).Head(4).Copy(); df["diff"] = df["Score"] - df["X"]; var exp = DataFrameIO.ReadStr("null\n0\n0\n0\n0"); df["diff"].AssertAlmostEqual(exp["null"].AsType(NumberType.R4), precision: 1e-1); } } }
/// <summary> /// Runs a simple test. /// </summary> public static void TestScikitAPI() { var inputs = new[] { new ExampleVector() { X = new float[] { 1, 10, 100 } }, new ExampleVector() { X = new float[] { 2, 3, 5 } }, new ExampleVector() { X = new float[] { 2, 4, 5 } }, new ExampleVector() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleVector() { X = new float[] { -1, -10, -100 } }, new ExampleVector() { X = new float[] { -2, -3, -5 } }, new ExampleVector() { X = new float[] { 3, 4, 5 } }, new ExampleVector() { X = new float[] { 3, 4, 7 } }, }; using (var host = new ConsoleEnvironment(conc: 1)) { ComponentHelper.AddStandardComponents(host); var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); if (predictor == null) { throw new Exception("Test failed: no predictor."); } var data2 = host.CreateStreamingDataView(inputs2); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); if (df.Shape.Item1 != 4 || df.Shape.Item2 != 12) { throw new Exception("Test failed: prediction failed."); } var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception("Test failed: prediction failed (header)."); } } } }