public void TestBcLrSameModel() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var output = FileHelper.GetOutputFile("bc-lr.zip", methodName); var name = FileHelper.GetOutputFile("bc.txt", methodName); var df = DataFrameIO.ReadStr("Label,X1,X2,X3,X4,X5,X6,X7,X8,X9\n" + "0,0.1,1.1,2.1,3.1,4.1,5.1,6.2,7.4,-5\n" + "1,1.1,1.1,2.1,3.1,4.1,5.1,6.2,7.4,-5\n" + "0,2.1,1.1,3.1,3.1,-4.1,5.1,6.2,7.4,-5\n" + "1,3.1,1.1,4.1,3.1,4.1,-5.1,6.2,7.4,-5\n" + "0,4.1,1.1,2.1,3.1,4.1,5.1,6.2,-7.4,-5"); df.ToCsv(name); var cmd = string.Format("Train tr=lr data={0} out={1} loader=text{{col=Label:R4:0 col=Features:R4:1-* sep=, header=+}}", name, output); var stdout = new StringBuilder(); ILogWriter logout = new LogWriter((string s) => { stdout.Append(s); }); ILogWriter logerr = new LogWriter((string s) => { stdout.Append(s); }); /*using (*/ var env = new DelegateEnvironment(seed: 0, verbose: 2, outWriter: logout, errWriter: logerr); MamlHelper.MamlScript(cmd, false, env); var stout = stdout.ToString(); if (string.IsNullOrEmpty(stout)) { throw new Exception(stout); } }
/// <summary> /// Reads a string as a IDataView. /// Follows pandas API. /// </summary> public static DataFrame ReadStr(string content, char sep = ',', bool header = true, string[] names = null, int[] dtypes = null, int nrows = -1, int guess_rows = 10, bool index = false) { var kinds = IntToColumnTypes(dtypes); return(DataFrameIO.ReadStr(content, sep, header, names, kinds, nrows, guess_rows, index)); }
public void TestI_ScalerTransformNumericValuesMinMax() { using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0"); raw.SetShuffle(false); var loader = host.CreateTransform("concat{col=X:A,B}", raw); var data = host.CreateTransform("Scaler{col=X scale=minMax}", loader); (data as ITrainableTransform).Estimate(); var res = DataFrameIO.ReadView(data); var txt = res.ToString(); var exp = "A,B,X.0,X.1\n1.0,2.0,0.0,0.0\n2.0,3.0,0.11111111,0.11111111\n10.0,11.0,1.0,1.0"; var dfexp = DataFrameIO.ReadStr(exp); Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true)); } }
public void TestI_PolynomialTransformNumericValues() { using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0"); raw.SetShuffle(false); var loader = host.CreateTransform("concat{col=X:A,B}", raw); var data = host.CreateTransform("Poly{col=X}", loader); var res = DataFrameIO.ReadView(data); var txt = res.ToString(); Assert.IsFalse(string.IsNullOrEmpty(txt)); var exp = "A,B,X.0,X.1,X.2,X.3,X.4\n1.0,2.0,1.0,2.0,1.0,2.0,4.0\n2.0,3.0,2.0,3.0,4.0,6.0,9.0\n10.0,11.0,10.0,11.0,100.0,110.0,121.0"; var dfexp = DataFrameIO.ReadStr(exp); Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true)); } }
public void TestI_ScalerTransformNumericValuesMeanVar() { /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0"); raw.SetShuffle(false); var loader = host.CreateTransform("concat{col=X:A,B}", raw); var data = host.CreateTransform("Scaler{col=X}", loader); (data as ITrainableTransform).Estimate(); var res = DataFrameIO.ReadView(data); var txt = res.ToString(); Assert.IsNotNull(txt); var exp = "A,B,X.0,X.1\n1.0,2.0,-0.827605963,-0.827605963\n2.0,3.0,-0.5793242,-0.5793242\n10.0,11.0,1.40693,1.40693"; var dfexp = DataFrameIO.ReadStr(exp); Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true, sortBy: "A")); } }
public void TestTimeSeriesFloatRegression() { var inputs = new[] { new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, }; using (var host = EnvHelper.NewTestEnvironment()) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "concat{col=xt:time,one}" }, "sasdcar{iter=50}", host)) { pipe.Train(data, feature: "xt", label: "X"); var view = pipe.Predict(data); var df = DataFrameIO.ReadView(view).Head(4).Copy(); df["diff"] = df["Score"] - df["X"]; var exp = DataFrameIO.ReadStr("null\n0\n0\n0\n0"); df["diff"].AssertAlmostEqual(exp["null"].AsType(NumberType.R4), precision: 1e-1); } } }