static void TrainMultiToBinaryPredictorSparse(bool singleColumn, bool checkError) { var methodName = string.Format("{0}-{1}-V{2}", System.Reflection.MethodBase.GetCurrentMethod().Name, "lr", singleColumn ? "C" : "Vec"); var trainFile = FileHelper.GetTestFile("Train-28x28_small.txt"); var testFile = FileHelper.GetTestFile("Test-28x28_small.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1); { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(trainFile)); var roles = env.CreateExamples(loader, "Features", "Label"); var df = DataFrameIO.ReadView(roles.Data); Assert.IsTrue(df.Shape[0] > 0); var iova = string.Format("iova{{p=lr sc={0} nt=1}}", singleColumn ? "+" : "-"); loader = env.CreateLoader("Text{col=Label:R4:0 col=Features:R4:1-784}", new MultiFileSource(testFile)); var trainer = env.CreateTrainer(iova); using (var ch = env.Start("train")) { var predictor = trainer.Train(env, ch, roles); TestTrainerHelper.FinalizeSerializationTest(env, outModelFilePath, predictor, roles, outData, outData2, PredictionKind.MulticlassClassification, checkError, ratio: 0.1f); } } }
void FillCacheIfNotFilled() { lock (_lock) { if (!(_autoView is null)) { return; } _autoView = DataFrameIO.ReadView(_input, keepVectors: true, numThreads: _numThreads); if (_sortColumn >= 0) { var sortedPosition = new List <KeyValuePair <TValue, long> >(); long position = 0; TValue got = default(TValue); // We could use multithreading here but the cost of sorting // might be higher than going through an array in memory. using (var cursor = _autoView.GetRowCursor(_autoView.Schema.Where(c => c.Index == _sortColumn))) { var sortColumnGetter = cursor.GetGetter <TValue>( SchemaHelper._dc(_sortColumn, cursor)); while (cursor.MoveNext()) { sortColumnGetter(ref got); sortedPosition.Add(new KeyValuePair <TValue, long>(got, position)); ++position; } } sortedPosition.Sort(CompareTo); _autoView.Order(sortedPosition.Select(c => (int)c.Value).ToArray()); } } }
public void TestTagTrainOrScoreTransformCustomScorer() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("mc_iris.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); using (var env = EnvHelper.NewTestEnvironment()) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=-}", new MultiFileSource(dataFilePath)); using (var pipe = new ScikitPipeline(new[] { "Concat{col=Feature:Slength,Swidth}", "TagTrainScore{tr=iova{p=ft{nl=10 iter=1}} lab=Label feat=Feature tag=model scorer=MultiClassClassifierScorer{ex=AA}}" }, host: env)) { pipe.Train(loader); var pred = pipe.Predict(loader); var df = DataFrameIO.ReadView(pred); Assert.AreEqual(df.Shape, new Tuple <int, int>(150, 11)); var dfs = df.Head().ToString(); Assert.IsTrue(dfs.StartsWith("Label,Slength,Swidth,Plength,Pwidth,Feature.0,Feature.1,PredictedLabelAA,ScoreAA.0,ScoreAA.1,ScoreAA.2")); } } }
public void TestEP_PassThroughTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var iris = FileHelper.GetTestFile("iris.txt"); var outPass = FileHelper.GetOutputFile("data.idv", methodName); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features")); learningPipeline.Add(new Scikit.ML.EntryPoints.PassThrough() { Filename = outPass, SaveOnDisk = true }); learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8)); Assert.IsTrue(File.Exists(outPass)); }
private void ScikitAPI_SimpleTransform_Load(bool removeFirstTransform) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var output = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName); var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; string expected = null; using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); Assert.IsTrue(predictor != null); var data2 = host.CreateStreamingDataView(inputs2); var predictions = pipe.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); expected = dfs2; Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25"); pipe.Save(output, removeFirstTransform); } } using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data2 = host.CreateStreamingDataView(inputs2); using (var pipe2 = new ScikitPipeline(output, host)) { var predictions = pipe2.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(expected, dfs2); } } }
public void TestScikitAPI_SimplePredictor() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); var data2 = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2)); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")); } } }
public void TestI_PolynomialTransformNumericValues() { using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0"); raw.SetShuffle(false); var loader = host.CreateTransform("concat{col=X:A,B}", raw); var data = host.CreateTransform("Poly{col=X}", loader); var res = DataFrameIO.ReadView(data); var txt = res.ToString(); Assert.IsFalse(string.IsNullOrEmpty(txt)); var exp = "A,B,X.0,X.1,X.2,X.3,X.4\n1.0,2.0,1.0,2.0,1.0,2.0,4.0\n2.0,3.0,2.0,3.0,4.0,6.0,9.0\n10.0,11.0,10.0,11.0,100.0,110.0,121.0"; var dfexp = DataFrameIO.ReadStr(exp); Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true)); } }
public void TestI_ScalerTransformNumericValuesMinMax() { using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0"); raw.SetShuffle(false); var loader = host.CreateTransform("concat{col=X:A,B}", raw); var data = host.CreateTransform("Scaler{col=X scale=minMax}", loader); (data as ITrainableTransform).Estimate(); var res = DataFrameIO.ReadView(data); var txt = res.ToString(); var exp = "A,B,X.0,X.1\n1.0,2.0,0.0,0.0\n2.0,3.0,0.11111111,0.11111111\n10.0,11.0,1.0,1.0"; var dfexp = DataFrameIO.ReadStr(exp); Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true)); } }
public void TestEP_NearestNeighborsLPMc() { using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.EntryPoints.NearestNeighborsMultiClass()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 11)); } }
public void TestEP_ScalerTransform() { var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(conc: 1); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Scikit.ML.EntryPoints.Scaler("Features")); learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8)); }
public void TestI_ScalerTransformNumericValuesMeanVar() { /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var raw = DataFrameIO.ReadStr("A,B\n1.0,2.0\n2.0,3.0\n10.0,11.0"); raw.SetShuffle(false); var loader = host.CreateTransform("concat{col=X:A,B}", raw); var data = host.CreateTransform("Scaler{col=X}", loader); (data as ITrainableTransform).Estimate(); var res = DataFrameIO.ReadView(data); var txt = res.ToString(); Assert.IsNotNull(txt); var exp = "A,B,X.0,X.1\n1.0,2.0,-0.827605963,-0.827605963\n2.0,3.0,-0.5793242,-0.5793242\n10.0,11.0,1.40693,1.40693"; var dfexp = DataFrameIO.ReadStr(exp); Assert.AreEqual(0, dfexp.AlmostEquals(res, exc: true, printDf: true, sortBy: "A")); } }
public void TestScikitAPI_SimpleTransform() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); Assert.IsTrue(predictor != null); var data2 = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2); var predictions = pipe.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25"); } } }
public void TestTreePathInnerAPI() { using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); using (var pipe = new ScikitPipeline(new[] { "Concat{col=Feature:Sepal_length,Sepal_width}", "TreeFeat{tr=ft{iter=2} lab=Label feat=Feature}" })) { pipe.Train(df); var scorer = pipe.Predict(df); var dfout = DataFrameIO.ReadView(scorer); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 31)); var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName); dfout.ToCsv(outfile); Assert.IsTrue(File.Exists(outfile)); } } }
public void TestTreePathNewAPI() { using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var iris = FileHelper.GetTestFile("iris.txt"); var df = DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var importData = df.EPTextLoader(iris, sep: '\t', header: true); var learningPipeline = new GenericLearningPipeline(); learningPipeline.Add(importData); learningPipeline.Add(new Legacy.Transforms.ColumnConcatenator("Features", "Sepal_length", "Sepal_width")); learningPipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor() { MaxIterations = 2 }); var predictor = learningPipeline.Train(); var predictions = predictor.Predict(df); var dfout = DataFrameIO.ReadView(predictions); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 8)); var outfile = FileHelper.GetOutputFile("iris_path.txt", methodName); dfout.ToCsv(outfile); Assert.IsTrue(File.Exists(outfile)); } }
/// <summary> /// Trains the pipeline with data coming from a <see cref="IDataView"/>. /// </summary> public ScikitPipeline Train(IDataView data, string feature = "Feature", string label = null, string weight = null, string groupId = null) { IDataView trans = data; using (var ch = _env.Start("Create transforms")) { for (int i = 0; i < _transforms.Length; ++i) { try { trans = _env.CreateTransform(_transforms[i].transformSettings, trans); } catch (Exception e) { if (e.ToString().Contains("Unknown loadable class")) { var nn = _env.ComponentCatalog.GetAllClasses().Length; var filt = _env.ComponentCatalog.GetAllClasses() .Select(c => c.UserName) .OrderBy(c => c) .Where(c => c.Trim().Length > 2); var regis = string.Join("\n", filt); throw Contracts.Except(e, $"Unable to create transform '{_transforms[i].transformSettings}', assembly not registered among {nn}\n{regis}"); } throw e; } _transforms[i].transform = trans as IDataTransform; } } if (_predictor != null) { using (var ch = _env.Start("Create Predictor")) { _predictor.trainer = TrainerHelper.CreateTrainer(_env, _predictor.trainerSettings); _roles = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >(); _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, feature)); if (!string.IsNullOrEmpty(label)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, label)); } if (!string.IsNullOrEmpty(groupId)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Group, groupId)); } if (!string.IsNullOrEmpty(weight)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Weight, weight)); } var roleMap = new RoleMappedData(trans, label, feature, group: groupId, weight: weight); _predictor.predictor = _predictor.trainer.Train(_env, ch, roleMap); _predictor.roleMapData = roleMap; } } else { _predictor = new StepPredictor() { predictor = null, trainer = null, trainerSettings = null, roleMapData = new RoleMappedData(trans) }; // We predict one to make sure everything works fine. using (var ch = _env.Start("Compute one prediction.")) { var df = DataFrameIO.ReadView(trans, 1, keepVectors: true, env: _env); if (df.Length == 0) { throw _env.ExceptEmpty("Something went wrong. The pipeline does not produce any output."); } } } return(this); }
/// <summary> /// Runs a simple test. /// </summary> public static void TestScikitAPI() { var inputs = new[] { new ExampleVector() { X = new float[] { 1, 10, 100 } }, new ExampleVector() { X = new float[] { 2, 3, 5 } }, new ExampleVector() { X = new float[] { 2, 4, 5 } }, new ExampleVector() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleVector() { X = new float[] { -1, -10, -100 } }, new ExampleVector() { X = new float[] { -2, -3, -5 } }, new ExampleVector() { X = new float[] { 3, 4, 5 } }, new ExampleVector() { X = new float[] { 3, 4, 7 } }, }; using (var host = new ConsoleEnvironment(conc: 1)) { ComponentHelper.AddStandardComponents(host); var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); if (predictor == null) { throw new Exception("Test failed: no predictor."); } var data2 = host.CreateStreamingDataView(inputs2); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); if (df.Shape.Item1 != 4 || df.Shape.Item2 != 12) { throw new Exception("Test failed: prediction failed."); } var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception("Test failed: prediction failed (header)."); } } } }
public void TestTimeSeriesFloatRegression() { var inputs = new[] { new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, new InputOutput() { X = 5f, time = 0f, one = 1f }, new InputOutput() { X = 7f, time = 1f, one = 1f }, new InputOutput() { X = 9f, time = 2f, one = 1f }, new InputOutput() { X = 11f, time = 3f, one = 1f }, }; using (var host = EnvHelper.NewTestEnvironment()) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "concat{col=xt:time,one}" }, "sasdcar{iter=50}", host)) { pipe.Train(data, feature: "xt", label: "X"); var view = pipe.Predict(data); var df = DataFrameIO.ReadView(view).Head(4).Copy(); df["diff"] = df["Score"] - df["X"]; var exp = DataFrameIO.ReadStr("null\n0\n0\n0\n0"); df["diff"].AssertAlmostEqual(exp["null"].AsType(NumberType.R4), precision: 1e-1); } } }
public void TestScikitAPI_SimplePredictor_FastValueMapper() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; DataFrame df1, df2, df3; using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data = host.CreateStreamingDataView(inputs); var data2 = host.CreateStreamingDataView(inputs2); df1 = DataFrameIO.ReadView(data, env: host, keepVectors: true); df2 = DataFrameIO.ReadView(data2, env: host, keepVectors: true); df3 = DataFrameIO.ReadView(data2, env: host, keepVectors: true); } using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { DataFrame pred = null, pred2 = null; var predictor = pipe.Train(df1, feature: "X"); Assert.IsTrue(predictor != null); pipe.Predict(df2, ref pred); Assert.AreEqual(pred.Shape, new Tuple <int, int>(4, 3)); var dfs = pred.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception($"Wrong starts\n{dfs2}"); } pipe.Predict(df3, ref pred2); pred.AssertAlmostEqual(pred2); } } }
private void ScikitAPI_SimplePredictor_Load(bool removeFirstTransform) { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var output = FileHelper.GetOutputFile($"model{removeFirstTransform}.zip", methodName); var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; string expected = null; using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data = host.CreateStreamingDataView(inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); var data2 = host.CreateStreamingDataView(inputs2); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception($"Wrong starts\n{dfs2}"); } expected = dfs2; pipe.Save(output, removeFirstTransform); } } using (var host = EnvHelper.NewTestEnvironment(conc: 1)) { var data2 = host.CreateStreamingDataView(inputs2); using (var pipe2 = new ScikitPipeline(output, host)) { var predictions = pipe2.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(expected, dfs2); } } }
/// <summary> /// Creates a <see cref="DataFrame"/> from a IDataView. /// </summary> public static DataFrame ReadView(IDataView view, int nrows = -1) { return(DataFrameIO.ReadView(view, nrows)); }